From 33702a07bd5e6c635458fd9e2b912a06fa52a8cb Mon Sep 17 00:00:00 2001 From: refai06 Date: Fri, 7 Feb 2025 22:08:23 +0530 Subject: [PATCH 1/7] Experimental-export-module-refactoring Signed-off-by: refai06 --- ...kspace_Creation_from_JupyterNotebook.ipynb | 10 +- .../TwoPartyWorkspaceCreation.ipynb | 8 +- .../workflow/interface/cli/workspace.py | 4 +- .../workflow/notebooktools/__init__.py | 8 + .../code_analyzer.py} | 442 +++++++----------- .../workflow/notebooktools/notebook_tools.py | 253 ++++++++++ .../workflow/runtime/federated_runtime.py | 6 +- .../workflow/workspace_export/__init__.py | 5 - 8 files changed, 449 insertions(+), 287 deletions(-) create mode 100644 openfl/experimental/workflow/notebooktools/__init__.py rename openfl/experimental/workflow/{workspace_export/export.py => notebooktools/code_analyzer.py} (51%) create mode 100644 openfl/experimental/workflow/notebooktools/notebook_tools.py delete mode 100644 openfl/experimental/workflow/workspace_export/__init__.py diff --git a/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb b/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb index ab31b26d50..525365a2a9 100644 --- a/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb +++ b/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb @@ -22,7 +22,7 @@ "##### High Level Overview of Methodology\n", "1. User annotates the relevant cells of the Jupyter notebook with `#| export` directive\n", "2. We then Leverage `nbdev` functionality to export these annotated cells of Jupyter notebook into a Python script\n", - "3. Utilize OpenFL experimental workflow module `WorkspaceExport` to convert the Python script into a OpenFL workspace\n", + "3. Utilize OpenFL experimental workflow module `NotebookTools` to convert the Python script into a OpenFL workspace\n", "4. User can utilize the experimental `fx` commands to deploy and run the federation seamlessly\n", "\n", "\n", @@ -946,7 +946,7 @@ "The following cells convert the Jupyter notebook into a Python script and create a Template Workspace that can be utilized by Aggregator based Workflow\n", "> NOTE: Only Notebook cells that were marked with `#| export` directive shall be included in this Python script\n", "\n", - "We first import `WorkspaceExport` module and execute `WorkspaceExport.export()` that converts the notebook and generates the template workspace. User is required to specify: \n", + "We first import `NotebookTools` module and execute `NotebookTools.export()` that converts the notebook and generates the template workspace. User is required to specify: \n", "1. `notebook_path`: path of the Jupyter notebook that is required to be converted\n", "2. `output_workspace`: path where the converted workspace is stored" ] @@ -959,9 +959,9 @@ "outputs": [], "source": [ "import os\n", - "from openfl.experimental.workflow.workspace_export import WorkspaceExport\n", + "from openfl.experimental.workflow.notebooktools import NotebookTools\n", "\n", - "WorkspaceExport.export(\n", + "NotebookTools.export(\n", " notebook_path='./1001_Workspace_Creation_from_JupyterNotebook.ipynb',\n", " output_workspace=f\"/home/{os.environ['USER']}/generated-workspace\"\n", ")" @@ -1065,7 +1065,7 @@ ], "metadata": { "kernelspec": { - "display_name": "fed_run", + "display_name": "dir_workspace_3.10", "language": "python", "name": "python3" }, diff --git a/openfl-tutorials/experimental/workflow/Vertical_FL/TwoPartyWorkspaceCreation.ipynb b/openfl-tutorials/experimental/workflow/Vertical_FL/TwoPartyWorkspaceCreation.ipynb index 1395a5095a..1cfc63c5f3 100644 --- a/openfl-tutorials/experimental/workflow/Vertical_FL/TwoPartyWorkspaceCreation.ipynb +++ b/openfl-tutorials/experimental/workflow/Vertical_FL/TwoPartyWorkspaceCreation.ipynb @@ -20,7 +20,7 @@ "##### High Level Overview of Methodology\n", "1. User annotates the relevant cells of the Jupyter notebook with `#| export` directive\n", "2. We then Leverage `nbdev` functionality to export these annotated cells of Jupyter notebook into a Python script\n", - "3. Utilize OpenFL experimental workflow module `WorkspaceExport` to convert the Python script into a OpenFL workspace\n", + "3. Utilize OpenFL experimental workflow module `NotebookTools` to convert the Python script into a OpenFL workspace\n", "4. User can utilize the experimental `fx` commands to deploy and run the federation seamlessly\n", "\n", "\n", @@ -340,7 +340,7 @@ "The following cells convert the Jupyter notebook into a Python script and create a Template Workspace that can be utilized by Aggregator based Workflow\n", "> NOTE: Only Notebook cells that were marked with `#| export` directive shall be included in this Python script\n", "\n", - "We first import `WorkspaceExport` module and execute `WorkspaceExport.export()` that converts the notebook and generates the template workspace. User is required to specify: \n", + "We first import `NotebookTools` module and execute `NotebookTools.export()` that converts the notebook and generates the template workspace. User is required to specify: \n", "1. `notebook_path`: path of the Jupyter notebook that is required to be converted\n", "2. `output_workspace`: path where the converted workspace is stored" ] @@ -353,9 +353,9 @@ "outputs": [], "source": [ "import os\n", - "from openfl.experimental.workflow.workspace_export import WorkspaceExport\n", + "from openfl.experimental.workflow.notebooktools import NotebookTools\n", "\n", - "WorkspaceExport.export(\n", + "NotebookTools.export(\n", " notebook_path='./TwoPartyWorkspaceCreation.ipynb',\n", " output_workspace=f\"/home/{os.environ['USER']}/generated-workspace\"\n", ")" diff --git a/openfl/experimental/workflow/interface/cli/workspace.py b/openfl/experimental/workflow/interface/cli/workspace.py index fb38786b46..07e3e4a5e4 100644 --- a/openfl/experimental/workflow/interface/cli/workspace.py +++ b/openfl/experimental/workflow/interface/cli/workspace.py @@ -30,7 +30,7 @@ print_tree, ) from openfl.experimental.workflow.interface.cli.plan import freeze_plan -from openfl.experimental.workflow.workspace_export import WorkspaceExport +from openfl.experimental.workflow.notebooktools import NotebookTools from openfl.utilities.path_check import is_directory_traversal from openfl.utilities.utils import rmtree from openfl.utilities.workspace import dump_requirements_file @@ -138,7 +138,7 @@ def create_(prefix, custom_template, template, notebook, template_output_dir): + "save your Jupyter Notebook workspace." ) - WorkspaceExport.export( + NotebookTools.export( notebook_path=notebook, output_workspace=template_output_dir, ) diff --git a/openfl/experimental/workflow/notebooktools/__init__.py b/openfl/experimental/workflow/notebooktools/__init__.py new file mode 100644 index 0000000000..41d1e84ced --- /dev/null +++ b/openfl/experimental/workflow/notebooktools/__init__.py @@ -0,0 +1,8 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +"""openfl.experimental.workflow.notebooktools package.""" + +from openfl.experimental.workflow.notebooktools.code_analyzer import CodeAnalyzer +from openfl.experimental.workflow.notebooktools.notebook_tools import NotebookTools diff --git a/openfl/experimental/workflow/workspace_export/export.py b/openfl/experimental/workflow/notebooktools/code_analyzer.py similarity index 51% rename from openfl/experimental/workflow/workspace_export/export.py rename to openfl/experimental/workflow/notebooktools/code_analyzer.py index 14750de2ff..5c559ecfd3 100644 --- a/openfl/experimental/workflow/workspace_export/export.py +++ b/openfl/experimental/workflow/notebooktools/code_analyzer.py @@ -1,82 +1,43 @@ # Copyright 2020-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -"""Workspace Export module.""" - import ast -import importlib import inspect import re -import shutil import sys +from importlib import import_module from logging import getLogger from pathlib import Path -from shutil import copytree -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import nbformat -import yaml from nbdev.export import nb_export -from openfl.experimental.workflow.federated.plan import Plan -from openfl.experimental.workflow.interface.cli.cli_helper import print_tree - logger = getLogger(__name__) -class WorkspaceExport: - """Convert a LocalRuntime Jupyter Notebook to Aggregator based - FederatedRuntime Workflow. +class CodeAnalyzer: + """Code transforamtion and analysis functionality for NotebookTools Attributes: - notebook_path: Absolute path of jupyter notebook. - template_workspace_path: Path to template workspace provided with - OpenFL. - output_workspace_path: Output directory for new generated workspace - (default="/tmp"). + script_path: Absoluet path to python script. + script_name: Name of the python script. """ - def __init__(self, notebook_path: str, output_workspace: str) -> None: - """Initialize a WorkspaceExport object. + def __init__(self) -> None: + """Initialize CodeTransformer""" + self.script_path = None + self.script_name = None + + def _initialize_script(self, notebook_path: Path, output_path: Path) -> None: + """Initialize and process the script from notebook Args: - notebook_path (str): Path to Jupyter notebook. - output_workspace (str): Path to output_workspace to be - generated. + notebook_path (str): Path to Jupyter notebook. + output_workspace (str): Path to output_workspace to be + generated. """ - - self.notebook_path = Path(notebook_path).resolve() - # Check if the Jupyter notebook exists - if not self.notebook_path.exists() or not self.notebook_path.is_file(): - raise FileNotFoundError(f"The Jupyter notebook at {notebook_path} does not exist.") - - self.output_workspace_path = Path(output_workspace).resolve() - # Regenerate the workspace if it already exists - if self.output_workspace_path.exists(): - shutil.rmtree(self.output_workspace_path) - self.output_workspace_path.parent.mkdir(parents=True, exist_ok=True) - - self.template_workspace_path = ( - Path(f"{__file__}") - .parent.parent.parent.parent.parent.joinpath( - "openfl-workspace", - "experimental", - "workflow", - "AggregatorBasedWorkflow", - "template_workspace", - ) - .resolve(strict=True) - ) - - # Copy template workspace to output directory - self.created_workspace_path = Path( - copytree(self.template_workspace_path, self.output_workspace_path) - ) - logger.info(f"Copied template workspace to {self.created_workspace_path}") - - logger.info("Converting jupter notebook to python script...") - export_filename = self.__get_exp_name() + export_filename = self.__get_exp_name(notebook_path) if export_filename is None: raise NameError( "Please include `#| default_exp ` in " @@ -84,24 +45,19 @@ def __init__(self, notebook_path: str, output_workspace: str) -> None: ) self.script_path = Path( self.__convert_to_python( - self.notebook_path, - self.created_workspace_path.joinpath("src"), + notebook_path, + output_path.joinpath("src"), f"{export_filename}.py", ) ).resolve() - - # Generated python script name without .py extension self.script_name = self.script_path.name.split(".")[0].strip() - # Comment flow.run() so when script is imported flow does not start - # executing - self.__comment_flow_execution() - # This is required as Ray created actors too many actors when - # backend="ray" # NOQA - self.__change_runtime() - - def __get_exp_name(self) -> None: - """Fetch the experiment name from the Jupyter notebook.""" - with open(str(self.notebook_path), "r") as f: + + def __get_exp_name(self, notebook_path: Path) -> None: + """Fetch the experiment name from the Jupyter notebook. + Args: + notebook_path (str): Path to Jupyter notebook. + """ + with open(str(notebook_path), "r") as f: notebook_content = nbformat.read(f, as_version=nbformat.NO_CONVERT) for cell in notebook_content.cells: @@ -115,46 +71,41 @@ def __get_exp_name(self) -> None: def __convert_to_python(self, notebook_path: Path, output_path: Path, export_filename) -> Path: """Converts a Jupyter notebook to a Python script. - Args: notebook_path (Path): The path to the Jupyter notebook file to be converted. output_path (Path): The directory where the exported Python script should be saved. export_filename: The name of the exported Python script file. + + Returns: + Path: The path to the exported Python script file. """ nb_export(notebook_path, output_path) return Path(output_path).joinpath(export_filename).resolve() - def __comment_flow_execution(self) -> None: - """In the python script search for ".run()" and comment it.""" - with open(self.script_path, "r") as f: - data = f.readlines() - for idx, line in enumerate(data): - if ".run()" in line: - data[idx] = f"# {line}" - with open(self.script_path, "w") as f: - f.writelines(data) - - def __change_runtime(self) -> None: - """Change the LocalRuntime backend from ray to single_process.""" - with open(self.script_path, "r") as f: - data = f.read() - - if "backend='ray'" in data or 'backend="ray"' in data: - data = data.replace("backend='ray'", "backend='single_process'").replace( - 'backend="ray"', 'backend="single_process"' - ) + def __import_exported_script(self) -> None: + """ + Imports the generated python script using the importlib module + """ + try: + sys.path.append(str(self.script_path.parent)) + self.exported_script_module = import_module(self.script_name) + self.available_modules_in_exported_script = dir(self.exported_script_module) - with open(self.script_path, "w") as f: - f.write(data) + except ImportError as e: + logger.error(f"Failed to import script {self.script_name}: {e}") + raise def __get_class_arguments(self, class_name) -> list: """Given the class name returns expected class arguments. Args: - class_name (str): Name of the class + class_name (str): The name of the class. + + Returns: + list: A list of expected class arguments. """ # Import python script if not already if not hasattr(self, "exported_script_module"): @@ -192,9 +143,13 @@ def __get_class_name_and_sourcecode_from_parent_class( ) -> Optional[Tuple[Optional[str], Optional[str]]]: """Provided the parent_class name returns derived class source code and name. - Args: - parent_class: FLSpec instance + parent_class: FLSpec instance. + + Returns: + Optional[Tuple[Optional[str], Optional[str]]]: + The source code of the derived class (str). + The name of the derived class (str). """ # Import python script if not already if not hasattr(self, "exported_script_module"): @@ -209,11 +164,13 @@ def __get_class_name_and_sourcecode_from_parent_class( return None, None def __extract_class_initializing_args(self, class_name) -> Dict[str, Any]: - """Provided name of the class returns expected arguments and its - values in the form of a dictionary. - + """Provided name of the class returns expected arguments and it's + values in form of dictionary. Args: - class_name (str): Name of the class + class_name (str): The name of the class. + + Returns: + Dict[str, Any]: A dictionary containing the expected arguments and their values. """ instantiation_args = {"args": {}, "kwargs": {}} @@ -262,83 +219,15 @@ def _clean_value(self, value: str) -> str: value = value.lstrip("[").rstrip("]") return value - def __import_exported_script(self) -> None: - """ - Imports generated python script with help of importlib - """ - - sys.path.append(str(self.script_path.parent)) - self.exported_script_module = importlib.import_module(self.script_name) - self.available_modules_in_exported_script = dir(self.exported_script_module) - - def __read_yaml(self, path) -> None: - with open(path, "r") as y: - return yaml.safe_load(y) - - def __write_yaml(self, path, data) -> None: - with open(path, "w") as y: - yaml.safe_dump(data, y) - - @classmethod - def export_federated( - cls, notebook_path: str, output_workspace: str, director_fqdn: str, tls: bool = False - ) -> Tuple[str, str]: - """Exports workspace for FederatedRuntime. - - Args: - notebook_path (str): Path to the Jupyter notebook. - output_workspace (str): Path for the generated workspace directory. - director_fqdn (str): Fully qualified domain name of the director node. - tls (bool, optional): Whether to use TLS for the connection. - - Returns: - Tuple[str, str]: A tuple containing: - (archive_path, flow_class_name). - """ - instance = cls(notebook_path, output_workspace) - instance.generate_requirements() - instance.generate_plan_yaml(director_fqdn, tls) - instance._clean_generated_workspace() - print_tree(output_workspace, level=2) - return instance.generate_experiment_archive() - - @classmethod - def export(cls, notebook_path: str, output_workspace: str) -> None: - """Exports workspace to output_workspace. - - Args: - notebook_path (str): Path to the Jupyter notebook. - output_workspace (str): Path for the generated workspace directory. - """ - instance = cls(notebook_path, output_workspace) - instance.generate_requirements() - instance.generate_plan_yaml() - instance.generate_data_yaml() - print_tree(output_workspace, level=2) - - def generate_experiment_archive(self) -> Tuple[str, str]: - """ - Create archive of the generated workspace + def get_requirements(self) -> Tuple[List[str], List[int], List[str]]: + """Extract pip libraries from the script Returns: - Tuple[str, str]: A tuple containing: - (generated_workspace_path, archive_path, flow_class_name). + tuple: A tuple containing: + requirements (list of str): List of pip libraries found in the script. + line_nos (list of int): List of line numbers where "pip install" commands are found. + data (list of str): The entire script data as a list of lines. """ - parent_directory = self.output_workspace_path.parent - archive_path = parent_directory / "experiment" - - # Create a ZIP archive of the generated_workspace directory - arch_path = shutil.make_archive(str(archive_path), "zip", str(self.output_workspace_path)) - - print(f"Archive created at {archive_path}.zip") - - return arch_path, self.flow_class_name - - # Have to do generate_requirements before anything else - # because these !pip commands needs to be removed from python script - def generate_requirements(self) -> None: - """Finds pip libraries mentioned in exported python script and append - in workspace/requirements.txt.""" data = None with open(self.script_path, "r") as f: requirements = [] @@ -353,133 +242,135 @@ def generate_requirements(self) -> None: if not line.startswith("#") and "-r" not in line and "openfl.git" not in line: requirements.append(f"{line.split(' ')[-1].strip()}\n") - requirements_filepath = str( - self.created_workspace_path.joinpath("requirements.txt").resolve() - ) - - # Write libraries found in requirements.txt - with open(requirements_filepath, "a") as f: - f.writelines(requirements) + return requirements, line_nos, data - # Delete pip requirements from python script - # if not we won't be able to import python script. + def remove_lines(self, data: List[str], line_nos: List[int]) -> None: + """Removes pip install lines from the script + Args: + data (List[str]): The entire script data as a list of lines. + line_nos (List[int]): List of line numbers where "pip install" commands are found. + """ with open(self.script_path, "w") as f: for i, line in enumerate(data): if i not in line_nos: f.write(line) - def _clean_generated_workspace(self) -> None: + def get_flow_class_details(self, parent_class) -> Dict[str, Any]: """ - Remove cols.yaml and data.yaml from the generated workspace - as these are not needed in FederatedRuntime (Director based workflow) + Retrieves details of a flow class that inherits from the given parent clas + Args: + parent_class: The parent class (FLSpec instance). + Returns: + Dict[str, Any]: A dictionary containing: + flow_class_name (str): The name of the flow class. + expected_args (List[str]): The expected arguments for the flow class. + init_args (Dict[str, Any]): The initialization arguments for the flow class. """ - cols_file = self.output_workspace_path.joinpath("plan", "cols.yaml") - data_file = self.output_workspace_path.joinpath("plan", "data.yaml") + _, flow_class_name = self.__get_class_name_and_sourcecode_from_parent_class(parent_class) + if not flow_class_name: + raise ValueError("No flow class found that inherits from FLSpec") - if cols_file.exists(): - cols_file.unlink() - if data_file.exists(): - data_file.unlink() + # Get expected arguments + expected_arguments = self.__get_class_arguments(flow_class_name) - def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> None: - """ - Generates plan.yaml + # get initialization arguments + init_args = self.__extract_class_initializing_args(flow_class_name) + + return { + "flow_class_name": flow_class_name, + "expected_args": expected_arguments, + "init_args": init_args, + } + def analyze_flow_configuration(self, flow_details: Dict[str, Any]) -> Dict[str, Any]: + """Analyze flow configuration from flow details. Args: - director_fqdn (str): Fully qualified domain name of the director node. - tls (bool, optional): Whether to use TLS for the connection. + flow_details (Dict[str, Any]): Dictionary containing flow class details. + + Returns: + Dict[str, Any]: Dictionary containing the plan configuration """ - flspec = importlib.import_module("openfl.experimental.workflow.interface").FLSpec - # Get flow classname - _, self.flow_class_name = self.__get_class_name_and_sourcecode_from_parent_class(flspec) - # Get expected arguments of flow class - self.flow_class_expected_arguments = self.__get_class_arguments(self.flow_class_name) - # Get provided arguments to flow class - self.arguments_passed_to_initialize = self.__extract_class_initializing_args( - self.flow_class_name - ) - - plan = self.created_workspace_path.joinpath("plan", "plan.yaml").resolve() - data = self.__read_yaml(plan) - if data is None: - data = {} - data["federated_flow"] = {"settings": {}, "template": ""} - - data["federated_flow"]["template"] = f"src.{self.script_name}.{self.flow_class_name}" - - def update_dictionary(args: dict, data: dict, dtype: str = "args"): + flow_config = { + "federated_flow": { + "settings": {}, + "template": f"src.{self.script_name}.{flow_details['flow_class_name']}", + } + } + + def update_dictionary(args: dict, dtype: str = "args") -> None: + """Update plan configuration with argument values. + + Args: + args: Dictionary of arguments to process + dtype: Type of arguments ('args' or 'kwargs') + """ for idx, (k, v) in enumerate(args.items()): if dtype == "args": v = getattr(self.exported_script_module, str(k), None) - if v is not None and type(v) not in (int, str, bool): + if v is not None and not isinstance(v, (int, str, bool)): v = f"src.{self.script_name}.{k}" - k = self.flow_class_expected_arguments[idx] + k = flow_details["expected_args"][idx] elif dtype == "kwargs": - if v is not None and type(v) not in (int, str, bool): - v = f"src.{self.script_name}.{k}" - data["federated_flow"]["settings"].update({k: v}) - - # Find positional arguments of flow class and it's values - pos_args = self.arguments_passed_to_initialize["args"] - update_dictionary(pos_args, data, dtype="args") - # Find kwargs of flow class and it's values - kw_args = self.arguments_passed_to_initialize["kwargs"] - update_dictionary(kw_args, data, dtype="kwargs") - - # Updating the aggregator address with director's hostname and tls settings in plan.yaml - if director_fqdn: - network_settings = Plan.parse(plan).config["network"] - data["network"] = network_settings - data["network"]["settings"]["agg_addr"] = director_fqdn - data["network"]["settings"]["tls"] = tls - - self.__write_yaml(plan, data) - - def generate_data_yaml(self) -> None: - """Generates data.yaml.""" - # Import python script if not already + if v is not None and not isinstance(v, (int, str, bool)): + v = f"src.{self.script_name}.{v}" + flow_config["federated_flow"]["settings"].update({k: v}) + + # Process arguments + pos_args = flow_details["init_args"].get("args", {}) + update_dictionary(pos_args, "args") + kw_args = flow_details["init_args"].get("kwargs", {}) + update_dictionary(kw_args, "kwargs") + + return flow_config + + def get_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: + """Get federated flow class and runtime information. + Args: + flow_class_name (str): The name of the federated flow class to retrieve. + + Returns: + tuple: A tuple containing the runtime instance and the flow class name. + """ if not hasattr(self, "exported_script_module"): self.__import_exported_script() - self._find_flow_class_name_if_needed() - # Import flow class - federated_flow_class = getattr(self.exported_script_module, self.flow_class_name) - + federated_flow_class = getattr(self.exported_script_module, flow_class_name) flow_name, runtime = self._find_runtime_instance(federated_flow_class) - data_yaml = self.created_workspace_path.joinpath("plan", "data.yaml").resolve() - data = self._read_or_initialize_yaml(data_yaml) - runtime_name = "runtime_local" - runtime_created = self._process_aggregator(runtime, data, flow_name, runtime_name) - self._process_collaborators(runtime, data, flow_name, runtime_created, runtime_name) - self.__write_yaml(data_yaml, data) - - def _find_flow_class_name_if_needed(self): - """Find the flow class name if not already found.""" - if not hasattr(self, "flow_class_name"): - flspec = importlib.import_module("openfl.experimental.workflow.interface").FLSpec - _, self.flow_class_name = self.__get_class_name_and_sourcecode_from_parent_class(flspec) - - def _find_runtime_instance(self, federated_flow_class): - """Find the runtime instance.""" + return runtime, flow_name + + def _find_runtime_instance(self, federated_flow_class) -> Tuple[str, object]: + """Find runtime instance + Args: + federated_flow_class: The class object of the federated flow. + + Returns: + tuple: A tuple containing the name of the flow instance and the runtime instance. + """ for t in self.available_modules_in_exported_script: + tempstring = t t = getattr(self.exported_script_module, t) if isinstance(t, federated_flow_class): + flow_name = tempstring if not hasattr(t, "_runtime"): raise AttributeError("Unable to locate LocalRuntime instantiation") runtime = t._runtime if not hasattr(runtime, "collaborators"): raise AttributeError("LocalRuntime instance does not have collaborators") - return runtime + return flow_name, runtime raise AttributeError("Runtime instance not found") - def _read_or_initialize_yaml(self, data_yaml): - """Read or initialize the YAML data.""" - data = self.__read_yaml(data_yaml) - return data if data is not None else {} + def process_aggregator(self, runtime, data, flow_name, runtime_name) -> bool: + """Process the aggregator details. + Args: + runtime (Any): The runtime instance containing the aggregator. + data (Dict[str, Any]): The data dictionary to be updated with aggregator details. + flow_name (str): The name of the flow. + runtime_name (str): The name of the runtime. - def _process_aggregator(self, runtime, data, flow_name, runtime_name): - """Process the aggregator details.""" + Returns: + bool: A boolean indicating whether the runtime was created. + """ aggregator = runtime._aggregator runtime_created = False private_attrs_callable = aggregator.private_attributes_callable @@ -516,13 +407,26 @@ def _process_aggregator(self, runtime, data, flow_name, runtime_name): } return runtime_created - def _process_collaborators(self, runtime, data, flow_name, runtime_created, runtime_name): - """Process the collaborators.""" + def process_collaborators( + self, runtime, data, flow_name, runtime_created, runtime_name + ) -> Dict[str, Any]: + """Process the collaborators. + Args: + runtime (Any): The runtime instance containing the collaborators. + data (Dict[str, Any]): The data dictionary to be updated with collaborator details. + flow_name (str): The name of the flow. + runtime_created (bool): Flag indicating if the runtime has been created. + runtime_name (str): The name of the runtime. + + Returns: + Dict[str, Any]: The updated data dictionary with collaborator details. + """ collaborators = runtime._LocalRuntime__collaborators arguments_passed_to_initialize = self.__extract_class_initializing_args("Collaborator")[ "kwargs" ] runtime_collab_created = False + for collab in collaborators.values(): collab_name = collab.get_name() callable_func = collab.private_attributes_callable @@ -559,3 +463,5 @@ def _process_collaborators(self, runtime, data, flow_name, runtime_created, runt data[collab_name] = { "private_attributes": f"src.{self.script_name}.{collab_name}_private_attributes" } + + return data diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py new file mode 100644 index 0000000000..d2480ad88c --- /dev/null +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -0,0 +1,253 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Notebook Tools module.""" + +import shutil +from importlib import import_module +from logging import getLogger +from pathlib import Path +from shutil import copytree +from typing import Tuple + +import yaml + +from openfl.experimental.workflow.federated.plan import Plan +from openfl.experimental.workflow.interface.cli.cli_helper import print_tree +from openfl.experimental.workflow.notebooktools.code_analyzer import CodeAnalyzer + +logger = getLogger(__name__) + + +class NotebookTools: + """The class is responsible for converting workflow API + into an OpenFL workspace + + Attributes: + notebook_path: Absolute path of jupyter notebook. + template_workspace_path: Path to template workspace provided with + OpenFL. + output_workspace_path: Output directory for new generated workspace + (default="/tmp"). + """ + + def __init__(self, notebook_path: str, output_workspace: str) -> None: + """Initialize a NotebookTools object. + Args: + notebook_path (str): The path to the Jupyter notebook that needs to be converted. + output_workspace (str): The directory where the converted workspace will be saved + workspace + """ + self.notebook_path = Path(notebook_path).resolve() + # Check if the Jupyter notebook exists + if not self.notebook_path.exists() or not self.notebook_path.is_file(): + raise FileNotFoundError(f"The Jupyter notebook at {notebook_path} does not exist.") + + self.output_workspace_path = Path(output_workspace).resolve() + # Regenerate the workspace if it already exists + if self.output_workspace_path.exists(): + shutil.rmtree(self.output_workspace_path) + self.output_workspace_path.parent.mkdir(parents=True, exist_ok=True) + + self.template_workspace_path = ( + Path(f"{__file__}") + .parent.parent.parent.parent.parent.joinpath( + "openfl-workspace", + "experimental", + "workflow", + "AggregatorBasedWorkflow", + "template_workspace", + ) + .resolve(strict=True) + ) + + # Copy template workspace to output directory + copytree(self.template_workspace_path, self.output_workspace_path) + + logger.info(f"Copied template workspace to {self.output_workspace_path}") + + # Initialize CodeAnalyzer object + self.code_analyzer = CodeAnalyzer() + # Initialize the script with in the CodeAnalyzer + self.code_analyzer._initialize_script(self.notebook_path, self.output_workspace_path) + + @classmethod + def export_federated( + cls, notebook_path: str, output_workspace: str, director_fqdn: str, tls: bool = False + ) -> Tuple[str, str]: + """Exports workspace for FederatedRuntime. + + Args: + notebook_path (str): Path to the Jupyter notebook. + output_workspace (str): Path for the generated workspace directory. + director_fqdn (str): Fully qualified domain name of the director node. + tls (bool, optional): Whether to use TLS for the connection. + + Returns: + Tuple[str, str]: A tuple containing: + (archive_path, flow_class_name). + """ + instance = cls(notebook_path, output_workspace) + instance.generate_requirements() + instance.generate_plan_yaml(director_fqdn, tls) + instance._clean_generated_workspace() + print_tree(output_workspace, level=2) + + @classmethod + def export(cls, notebook_path: str, output_workspace: str) -> None: + """Exports workspace to output_workspace. + Args: + notebook_path (str): Path to the Jupyter notebook. + output_workspace (str): Path for the generated workspace directory. + """ + instance = cls(notebook_path, output_workspace) + instance.generate_requirements() + instance.generate_plan_yaml() + instance.generate_data_yaml() + print_tree(output_workspace, level=2) + + def generate_requirements(self) -> None: + """Extracts pip libraries mentioned in exported python script and append + in workspace/requirements.txt. + """ + requirements, line_numbers, data = self.code_analyzer.get_requirements() + + requirements_filepath = str( + self.output_workspace_path.joinpath("requirements.txt").resolve() + ) + + # Write libraries found in requirements.txt + with open(requirements_filepath, "a") as f: + f.writelines(requirements) + + # Delete pip requirements from the python script to ensure it can be imported + self.code_analyzer.remove_lines(data, line_numbers) + + def _clean_generated_workspace(self) -> None: + """ + Remove cols.yaml and data.yaml from the generated workspace + as these are not needed in FederatedRuntime (Director based workflow) + + """ + cols_file = self.output_workspace_path.joinpath("plan", "cols.yaml") + data_file = self.output_workspace_path.joinpath("plan", "data.yaml") + + if cols_file.exists(): + cols_file.unlink() + if data_file.exists(): + data_file.unlink() + + def __read_yaml(self, path) -> dict: + """Reads a YAML file and returns its contents. + Args: + path (str): The path to the YAML file. + + Returns: + dict: The contents of the YAML file. + """ + with open(path, "r") as y: + return yaml.safe_load(y) + + def __write_yaml(self, path, data) -> None: + """Writes data to a YAML file. + Args: + path (str): The path to the YAML file. + data (dict): The data to write to the YAML file. + """ + with open(path, "w") as y: + yaml.safe_dump(data, y) + + def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> None: + """Generate the plan.yaml file containing the federated learning flow configuration + Args: + director_fqdn (str): Fully qualified domain name of the director node. + tls (bool, optional): Whether to use TLS for the connection. + """ + flspec = import_module("openfl.experimental.workflow.interface").FLSpec + # Get the flow_class details + flow_details = self.code_analyzer.get_flow_class_details(flspec) + # Analyze and generate plan configuration + flow_config = self.code_analyzer.analyze_flow_configuration(flow_details) + + # Determine the path for the plan.yaml file + plan = self.output_workspace_path.joinpath("plan", "plan.yaml").resolve() + + ## Read or initialize the YAML data + data = self._read_or_initialize_plan_yaml(plan) + + # Update the plan_configuration with the analyzed flow configuration + data["federated_flow"].update(flow_config["federated_flow"]) + + # Updating the aggregator address with director's hostname and tls settings in plan.yaml + if director_fqdn: + network_settings = Plan.parse(plan).config["network"] + data["network"] = network_settings + data["network"]["settings"]["agg_addr"] = director_fqdn + data["network"]["settings"]["tls"] = tls + + # Write the updated plan configuraiton to the plan.yaml file + self.__write_yaml(plan, data) + + def generate_data_yaml(self) -> None: + """Generate data.yaml with runtime configuration""" + # Ensure flow_class is available + flow_class_name = self._ensure_flow_class() + + # Get runtime information using CodeAnalyzer + runtime, flow_name = self.code_analyzer.get_runtime_info(flow_class_name) + + # Determine the path for the data.yaml + data_yaml = self.output_workspace_path.joinpath("plan", "data.yaml").resolve() + + # Read or initialize the YAML data + data = self._read_or_initialize_data_yaml(data_yaml) + + # Initiaize runtime name + runtime_name = "local_runtime" + + # Process aggregator information using CodeAnalyzer + runtime_created = self.code_analyzer.process_aggregator( + runtime, data, flow_name, runtime_name + ) + + # Process collaborator information using CodeAnalyzer + data = self.code_analyzer.process_collaborators( + runtime, data, flow_name, runtime_created, runtime_name + ) + + # Write updated data configuration to the data.yaml file + self.__write_yaml(data_yaml, data) + + def _ensure_flow_class(self) -> str: + """Ensure flow class is available and returns its name""" + if not hasattr(self, "flow_class_name"): + flspsec = import_module("openfl.experimental.workflow.interface").FLSpec + flow_details = self.code_analyzer.get_flow_class_details(flspsec) + self.flow_class_name = flow_details["flow_class_name"] + + return self.flow_class_name + + def _read_or_initialize_plan_yaml(self, plan_yaml) -> dict: + """Read or initialize the plan YAML data. + Args: + plan_yaml (Path): The path to the plan.yaml file. + + Returns: + dict: The data dictionary from plan.yaml. + """ + data = self.__read_yaml(plan_yaml) + if data is None: + data = {} + data["federated_flow"] = {"settings": {}, "template": ""} + return data + + def _read_or_initialize_data_yaml(self, data_yaml) -> dict: + """Read or initialize the YAML data. + Args: + data_yaml (Path): The path to the data.yaml file. + + Returns: + dict: The data dictionary from data.yaml + """ + data = self.__read_yaml(data_yaml) + return data if data is not None else {} diff --git a/openfl/experimental/workflow/runtime/federated_runtime.py b/openfl/experimental/workflow/runtime/federated_runtime.py index 861c27e059..a4bc091ed5 100644 --- a/openfl/experimental/workflow/runtime/federated_runtime.py +++ b/openfl/experimental/workflow/runtime/federated_runtime.py @@ -16,9 +16,9 @@ import dill from tabulate import tabulate +from openfl.experimental.workflow.notebooktools import NotebookTools from openfl.experimental.workflow.runtime.runtime import Runtime from openfl.experimental.workflow.transport.grpc.director_client import DirectorClient -from openfl.experimental.workflow.workspace_export import WorkspaceExport logger = logging.getLogger(__name__) @@ -140,13 +140,13 @@ def _create_director_client(self) -> DirectorClient: def prepare_workspace_archive(self) -> Tuple[Path, str]: """ - Prepare workspace archive using WorkspaceExport. + Prepare workspace archive using NotebookTools. Returns: Tuple[Path, str]: A tuple containing the path of the created archive and the experiment name. """ - archive_path, exp_name = WorkspaceExport.export_federated( + archive_path, exp_name = NotebookTools.export_federated( notebook_path=self.notebook_path, output_workspace="./generated_workspace", director_fqdn=self.director["director_node_fqdn"], diff --git a/openfl/experimental/workflow/workspace_export/__init__.py b/openfl/experimental/workflow/workspace_export/__init__.py deleted file mode 100644 index f947a0fc36..0000000000 --- a/openfl/experimental/workflow/workspace_export/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -from openfl.experimental.workflow.workspace_export.export import WorkspaceExport From 259dcfc1160ac9604faf97a475e15703f37d8ef0 Mon Sep 17 00:00:00 2001 From: refai06 Date: Tue, 11 Feb 2025 15:57:12 +0530 Subject: [PATCH 2/7] Incorporated comments Signed-off-by: refai06 --- .../workflow/notebooktools/__init__.py | 2 +- .../workflow/notebooktools/code_analyzer.py | 85 +++++++----- .../workflow/notebooktools/notebook_tools.py | 123 +++++++++--------- 3 files changed, 118 insertions(+), 92 deletions(-) diff --git a/openfl/experimental/workflow/notebooktools/__init__.py b/openfl/experimental/workflow/notebooktools/__init__.py index 41d1e84ced..1f29ec701d 100644 --- a/openfl/experimental/workflow/notebooktools/__init__.py +++ b/openfl/experimental/workflow/notebooktools/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 Intel Corporation +# Copyright 2020-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/openfl/experimental/workflow/notebooktools/code_analyzer.py b/openfl/experimental/workflow/notebooktools/code_analyzer.py index 5c559ecfd3..1c0a086c45 100644 --- a/openfl/experimental/workflow/notebooktools/code_analyzer.py +++ b/openfl/experimental/workflow/notebooktools/code_analyzer.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 Intel Corporation +# Copyright 2020-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import ast @@ -17,32 +17,30 @@ class CodeAnalyzer: - """Code transforamtion and analysis functionality for NotebookTools + """Code analysis and transformation functionality for NotebookTools Attributes: - script_path: Absoluet path to python script. + script_path: Absolute path to python script. script_name: Name of the python script. """ - def __init__(self) -> None: - """Initialize CodeTransformer""" + def __init__(self, notebook_path: Path, output_path: Path) -> None: + """Initialize CodeAnalzer and process the script from notebook - self.script_path = None - self.script_name = None - - def _initialize_script(self, notebook_path: Path, output_path: Path) -> None: - """Initialize and process the script from notebook Args: - notebook_path (str): Path to Jupyter notebook. - output_workspace (str): Path to output_workspace to be - generated. + notebook_path (Path): The path to the Jupyter notebook that needs to be converted. + output_path (Path): The directory where the converted Python script will be saved. """ + logger.info("Converting jupter notebook to python script...") + + # Extract the export filename from the notebook export_filename = self.__get_exp_name(notebook_path) if export_filename is None: raise NameError( "Please include `#| default_exp ` in " "the first cell of the notebook." ) + # Convert the notebook to a Python script and set the script path self.script_path = Path( self.__convert_to_python( notebook_path, @@ -50,8 +48,16 @@ def _initialize_script(self, notebook_path: Path, output_path: Path) -> None: f"{export_filename}.py", ) ).resolve() + # Generated python script name self.script_name = self.script_path.name.split(".")[0].strip() + # Comment out flow.run() to prevent the flow from starting execution + # automatically when the script is imported. + self.__comment_flow_execution() + + # Change the runtime backend from 'ray' to 'single_process' + self.__change_runtime() + def __get_exp_name(self, notebook_path: Path) -> None: """Fetch the experiment name from the Jupyter notebook. Args: @@ -85,6 +91,29 @@ def __convert_to_python(self, notebook_path: Path, output_path: Path, export_fil return Path(output_path).joinpath(export_filename).resolve() + def __comment_flow_execution(self) -> None: + """Comment out lines containing '.run()' in the specified Python script""" + with open(self.script_path, "r") as f: + data = f.readlines() + for idx, line in enumerate(data): + if ".run()" in line: + data[idx] = f"# {line}" + with open(self.script_path, "w") as f: + f.writelines(data) + + def __change_runtime(self) -> None: + """Change the LocalRuntime backend from ray to single_process.""" + with open(self.script_path, "r") as f: + data = f.read() + + if "backend='ray'" in data or 'backend="ray"' in data: + data = data.replace("backend='ray'", "backend='single_process'").replace( + 'backend="ray"', 'backend="single_process"' + ) + + with open(self.script_path, "w") as f: + f.write(data) + def __import_exported_script(self) -> None: """ Imports the generated python script using the importlib module @@ -283,8 +312,8 @@ def get_flow_class_details(self, parent_class) -> Dict[str, Any]: "init_args": init_args, } - def analyze_flow_configuration(self, flow_details: Dict[str, Any]) -> Dict[str, Any]: - """Analyze flow configuration from flow details. + def fetch_flow_configuration(self, flow_details: Dict[str, Any]) -> Dict[str, Any]: + """Get flow configuration from flow details. Args: flow_details (Dict[str, Any]): Dictionary containing flow class details. @@ -324,7 +353,7 @@ def update_dictionary(args: dict, dtype: str = "args") -> None: return flow_config - def get_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: + def get_flow_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: """Get federated flow class and runtime information. Args: flow_class_name (str): The name of the federated flow class to retrieve. @@ -336,10 +365,10 @@ def get_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: self.__import_exported_script() federated_flow_class = getattr(self.exported_script_module, flow_class_name) - flow_name, runtime = self._find_runtime_instance(federated_flow_class) - return runtime, flow_name + flow_instance_name, runtime = self._find_flow_instance_runtime(federated_flow_class) + return runtime, flow_instance_name - def _find_runtime_instance(self, federated_flow_class) -> Tuple[str, object]: + def _find_flow_instance_runtime(self, federated_flow_class) -> Tuple[str, object]: """Find runtime instance Args: federated_flow_class: The class object of the federated flow. @@ -351,21 +380,21 @@ def _find_runtime_instance(self, federated_flow_class) -> Tuple[str, object]: tempstring = t t = getattr(self.exported_script_module, t) if isinstance(t, federated_flow_class): - flow_name = tempstring + flow_instance_name = tempstring if not hasattr(t, "_runtime"): raise AttributeError("Unable to locate LocalRuntime instantiation") runtime = t._runtime if not hasattr(runtime, "collaborators"): raise AttributeError("LocalRuntime instance does not have collaborators") - return flow_name, runtime + return flow_instance_name, runtime raise AttributeError("Runtime instance not found") - def process_aggregator(self, runtime, data, flow_name, runtime_name) -> bool: + def process_aggregator(self, runtime, data, flow_instance_name, runtime_name) -> bool: """Process the aggregator details. Args: runtime (Any): The runtime instance containing the aggregator. data (Dict[str, Any]): The data dictionary to be updated with aggregator details. - flow_name (str): The name of the flow. + flow_instance_name (str): The name of the flow instance. runtime_name (str): The name of the runtime. Returns: @@ -397,7 +426,7 @@ def process_aggregator(self, runtime, data, flow_name, runtime_name) -> bool: elif aggregator_private_attributes: runtime_created = True with open(self.script_path, "a") as f: - f.write(f"\n{runtime_name} = {flow_name}._runtime\n") + f.write(f"\n{runtime_name} = {flow_instance_name}._runtime\n") f.write( f"\naggregator_private_attributes = " f"{runtime_name}._aggregator.private_attributes\n" @@ -408,13 +437,13 @@ def process_aggregator(self, runtime, data, flow_name, runtime_name) -> bool: return runtime_created def process_collaborators( - self, runtime, data, flow_name, runtime_created, runtime_name + self, runtime, data, flow_instance_name, runtime_created, runtime_name ) -> Dict[str, Any]: """Process the collaborators. Args: runtime (Any): The runtime instance containing the collaborators. data (Dict[str, Any]): The data dictionary to be updated with collaborator details. - flow_name (str): The name of the flow. + flow_instance_name (str): The name of the flow instance. runtime_created (bool): Flag indicating if the runtime has been created. runtime_name (str): The name of the runtime. @@ -449,7 +478,7 @@ def process_collaborators( elif private_attributes: with open(self.script_path, "a") as f: if not runtime_created: - f.write(f"\n{runtime_name} = {flow_name}._runtime\n") + f.write(f"\n{runtime_name} = {flow_instance_name}._runtime\n") runtime_created = True if not runtime_collab_created: f.write( @@ -463,5 +492,3 @@ def process_collaborators( data[collab_name] = { "private_attributes": f"src.{self.script_name}.{collab_name}_private_attributes" } - - return data diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py index d2480ad88c..f06240b8cd 100644 --- a/openfl/experimental/workflow/notebooktools/notebook_tools.py +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -1,4 +1,4 @@ -# Copyright 2020-2024 Intel Corporation +# Copyright 2020-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Notebook Tools module.""" @@ -10,8 +10,6 @@ from shutil import copytree from typing import Tuple -import yaml - from openfl.experimental.workflow.federated.plan import Plan from openfl.experimental.workflow.interface.cli.cli_helper import print_tree from openfl.experimental.workflow.notebooktools.code_analyzer import CodeAnalyzer @@ -20,8 +18,8 @@ class NotebookTools: - """The class is responsible for converting workflow API - into an OpenFL workspace + """Class to convert LocalRuntime Jupyter notebook based on Workflow API into a + workspace that could be deployed on distributed infrastructure Attributes: notebook_path: Absolute path of jupyter notebook. @@ -67,9 +65,7 @@ def __init__(self, notebook_path: str, output_workspace: str) -> None: logger.info(f"Copied template workspace to {self.output_workspace_path}") # Initialize CodeAnalyzer object - self.code_analyzer = CodeAnalyzer() - # Initialize the script with in the CodeAnalyzer - self.code_analyzer._initialize_script(self.notebook_path, self.output_workspace_path) + self.code_analyzer = CodeAnalyzer(self.notebook_path, self.output_workspace_path) @classmethod def export_federated( @@ -92,6 +88,7 @@ def export_federated( instance.generate_plan_yaml(director_fqdn, tls) instance._clean_generated_workspace() print_tree(output_workspace, level=2) + return instance.generate_experiment_archive() @classmethod def export(cls, notebook_path: str, output_workspace: str) -> None: @@ -106,9 +103,27 @@ def export(cls, notebook_path: str, output_workspace: str) -> None: instance.generate_data_yaml() print_tree(output_workspace, level=2) + def generate_experiment_archive(self) -> Tuple[str, str]: + """ + Create archive of the generated workspace + + Returns: + Tuple[str, str]: A tuple containing: + (archive_path, flow_class_name). + """ + parent_directory = self.output_workspace_path.parent + archive_path = parent_directory / "experiment" + + # Create a ZIP archive of the generated_workspace directory + arch_path = shutil.make_archive(str(archive_path), "zip", str(self.output_workspace_path)) + + print(f"Archive created at {archive_path}.zip") + + return arch_path, self.flow_class_name + def generate_requirements(self) -> None: - """Extracts pip libraries mentioned in exported python script and append - in workspace/requirements.txt. + """Extracts pip libraries from exported python script + and append in workspace/requirements.txt """ requirements, line_numbers, data = self.code_analyzer.get_requirements() @@ -137,43 +152,27 @@ def _clean_generated_workspace(self) -> None: if data_file.exists(): data_file.unlink() - def __read_yaml(self, path) -> dict: - """Reads a YAML file and returns its contents. - Args: - path (str): The path to the YAML file. - - Returns: - dict: The contents of the YAML file. - """ - with open(path, "r") as y: - return yaml.safe_load(y) - - def __write_yaml(self, path, data) -> None: - """Writes data to a YAML file. - Args: - path (str): The path to the YAML file. - data (dict): The data to write to the YAML file. - """ - with open(path, "w") as y: - yaml.safe_dump(data, y) - def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> None: - """Generate the plan.yaml file containing the federated learning flow configuration + """Generate the plan.yaml Args: director_fqdn (str): Fully qualified domain name of the director node. tls (bool, optional): Whether to use TLS for the connection. """ - flspec = import_module("openfl.experimental.workflow.interface").FLSpec + # Get the flow_class details - flow_details = self.code_analyzer.get_flow_class_details(flspec) - # Analyze and generate plan configuration - flow_config = self.code_analyzer.analyze_flow_configuration(flow_details) + flow_details = self._extract_flow_details() + + # Get flow_class_name + self.flow_class_name = flow_details["flow_class_name"] + + # Get flow configuration + flow_config = self.code_analyzer.fetch_flow_configuration(flow_details) # Determine the path for the plan.yaml file plan = self.output_workspace_path.joinpath("plan", "plan.yaml").resolve() - ## Read or initialize the YAML data - data = self._read_or_initialize_plan_yaml(plan) + # Initialize the YAML data + data = self._initialize_plan_yaml(plan) # Update the plan_configuration with the analyzed flow configuration data["federated_flow"].update(flow_config["federated_flow"]) @@ -186,68 +185,68 @@ def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> No data["network"]["settings"]["tls"] = tls # Write the updated plan configuraiton to the plan.yaml file - self.__write_yaml(plan, data) + Plan.dump(plan, data) def generate_data_yaml(self) -> None: - """Generate data.yaml with runtime configuration""" - # Ensure flow_class is available - flow_class_name = self._ensure_flow_class() + """Generate data.yaml""" + + # Get flow class_name + if not hasattr(self, "flow_class_name"): + flow_details = self._extract_flow_details() + self.flow_class_name = flow_details["flow_class_name"] # Get runtime information using CodeAnalyzer - runtime, flow_name = self.code_analyzer.get_runtime_info(flow_class_name) + runtime, flow_instance_name = self.code_analyzer.get_flow_runtime_info(self.flow_class_name) # Determine the path for the data.yaml data_yaml = self.output_workspace_path.joinpath("plan", "data.yaml").resolve() - # Read or initialize the YAML data - data = self._read_or_initialize_data_yaml(data_yaml) + # Initialize the YAML data + data = self._initialize_data_yaml(data_yaml) # Initiaize runtime name runtime_name = "local_runtime" # Process aggregator information using CodeAnalyzer runtime_created = self.code_analyzer.process_aggregator( - runtime, data, flow_name, runtime_name + runtime, data, flow_instance_name, runtime_name ) # Process collaborator information using CodeAnalyzer - data = self.code_analyzer.process_collaborators( - runtime, data, flow_name, runtime_created, runtime_name + self.code_analyzer.process_collaborators( + runtime, data, flow_instance_name, runtime_created, runtime_name ) # Write updated data configuration to the data.yaml file - self.__write_yaml(data_yaml, data) - - def _ensure_flow_class(self) -> str: - """Ensure flow class is available and returns its name""" - if not hasattr(self, "flow_class_name"): - flspsec = import_module("openfl.experimental.workflow.interface").FLSpec - flow_details = self.code_analyzer.get_flow_class_details(flspsec) - self.flow_class_name = flow_details["flow_class_name"] + Plan.dump(data_yaml, data) - return self.flow_class_name + def _extract_flow_details(self) -> str: + """Extract the flow class details""" + flspsec = import_module("openfl.experimental.workflow.interface").FLSpec + flow_details = self.code_analyzer.get_flow_class_details(flspsec) + return flow_details - def _read_or_initialize_plan_yaml(self, plan_yaml) -> dict: - """Read or initialize the plan YAML data. + def _initialize_plan_yaml(self, plan_yaml) -> dict: + """Load or initialize the plan YAML data. Args: plan_yaml (Path): The path to the plan.yaml file. Returns: dict: The data dictionary from plan.yaml. """ - data = self.__read_yaml(plan_yaml) + data = Plan.load(plan_yaml) if data is None: data = {} data["federated_flow"] = {"settings": {}, "template": ""} return data - def _read_or_initialize_data_yaml(self, data_yaml) -> dict: - """Read or initialize the YAML data. + def _initialize_data_yaml(self, data_yaml) -> dict: + """Load or initialize the YAML data. Args: data_yaml (Path): The path to the data.yaml file. Returns: dict: The data dictionary from data.yaml """ - data = self.__read_yaml(data_yaml) + data = Plan.load(data_yaml) return data if data is not None else {} From 72874ad5d0a32cd350160a7e32907c6ef9d36e46 Mon Sep 17 00:00:00 2001 From: refai06 Date: Fri, 14 Feb 2025 09:38:19 +0530 Subject: [PATCH 3/7] Improvements added Signed-off-by: refai06 --- .../workflow/notebooktools/code_analyzer.py | 31 ++++++++++++++++--- .../workflow/notebooktools/notebook_tools.py | 22 +++++++------ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/openfl/experimental/workflow/notebooktools/code_analyzer.py b/openfl/experimental/workflow/notebooktools/code_analyzer.py index 1c0a086c45..2a065c52fc 100644 --- a/openfl/experimental/workflow/notebooktools/code_analyzer.py +++ b/openfl/experimental/workflow/notebooktools/code_analyzer.py @@ -58,7 +58,7 @@ def __init__(self, notebook_path: Path, output_path: Path) -> None: # Change the runtime backend from 'ray' to 'single_process' self.__change_runtime() - def __get_exp_name(self, notebook_path: Path) -> None: + def __get_exp_name(self, notebook_path: Path) -> str: """Fetch the experiment name from the Jupyter notebook. Args: notebook_path (str): Path to Jupyter notebook. @@ -73,7 +73,10 @@ def __get_exp_name(self, notebook_path: Path) -> None: if match: logger.info(f"Retrieved {match.group(1)} from default_exp") return match.group(1) - return None + raise ValueError( + "The notebook does not contain a '#| default_exp Path: """Converts a Jupyter notebook to a Python script. @@ -216,7 +219,13 @@ def __extract_class_initializing_args(self, class_name) -> Dict[str, Any]: return instantiation_args def _extract_positional_args(self, args) -> Dict[str, Any]: - """Extract positional arguments from the AST nodes.""" + """Extract positional arguments from the AST nodes. + Args: + args: AST nodes representing the arguments. + + Returns: + Dict[str, Any]: Dictionary of argument names and their values. + """ positional_args = {} for arg in args: if isinstance(arg, ast.Name): @@ -228,7 +237,13 @@ def _extract_positional_args(self, args) -> Dict[str, Any]: return positional_args def _extract_keyword_args(self, keywords) -> Dict[str, Any]: - """Extract keyword arguments from the AST nodes.""" + """Extract keyword arguments from the AST nodes. + Args: + keywords: AST nodes representing the keyword arguments. + + Returns: + Dict[str, Any]: Dictionary of keyword argument names and their values. + """ keyword_args = {} for kwarg in keywords: value = ast.unparse(kwarg.value).strip() @@ -241,7 +256,13 @@ def _extract_keyword_args(self, keywords) -> Dict[str, Any]: return keyword_args def _clean_value(self, value: str) -> str: - """Clean the value by removing unnecessary parentheses or brackets.""" + """Clean the value by removing unnecessary parentheses or brackets. + Args: + value (str): The string value to be cleaned. + + Returns: + str: The cleaned string value + """ if value.startswith("(") and "," not in value: value = value.lstrip("(").rstrip(")") if value.startswith("[") and "," not in value: diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py index f06240b8cd..63d26ddeeb 100644 --- a/openfl/experimental/workflow/notebooktools/notebook_tools.py +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -84,11 +84,11 @@ def export_federated( (archive_path, flow_class_name). """ instance = cls(notebook_path, output_workspace) - instance.generate_requirements() - instance.generate_plan_yaml(director_fqdn, tls) + instance._generate_requirements() + instance._generate_plan_yaml(director_fqdn, tls) instance._clean_generated_workspace() print_tree(output_workspace, level=2) - return instance.generate_experiment_archive() + return instance._generate_experiment_archive() @classmethod def export(cls, notebook_path: str, output_workspace: str) -> None: @@ -98,12 +98,12 @@ def export(cls, notebook_path: str, output_workspace: str) -> None: output_workspace (str): Path for the generated workspace directory. """ instance = cls(notebook_path, output_workspace) - instance.generate_requirements() - instance.generate_plan_yaml() - instance.generate_data_yaml() + instance._generate_requirements() + instance._generate_plan_yaml() + instance._generate_data_yaml() print_tree(output_workspace, level=2) - def generate_experiment_archive(self) -> Tuple[str, str]: + def _generate_experiment_archive(self) -> Tuple[str, str]: """ Create archive of the generated workspace @@ -121,7 +121,7 @@ def generate_experiment_archive(self) -> Tuple[str, str]: return arch_path, self.flow_class_name - def generate_requirements(self) -> None: + def _generate_requirements(self) -> None: """Extracts pip libraries from exported python script and append in workspace/requirements.txt """ @@ -152,7 +152,7 @@ def _clean_generated_workspace(self) -> None: if data_file.exists(): data_file.unlink() - def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> None: + def _generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> None: """Generate the plan.yaml Args: director_fqdn (str): Fully qualified domain name of the director node. @@ -187,7 +187,7 @@ def generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> No # Write the updated plan configuraiton to the plan.yaml file Plan.dump(plan, data) - def generate_data_yaml(self) -> None: + def _generate_data_yaml(self) -> None: """Generate data.yaml""" # Get flow class_name @@ -224,6 +224,8 @@ def _extract_flow_details(self) -> str: """Extract the flow class details""" flspsec = import_module("openfl.experimental.workflow.interface").FLSpec flow_details = self.code_analyzer.get_flow_class_details(flspsec) + if not flow_details: + raise ValueError("Failed to extract flow class details") return flow_details def _initialize_plan_yaml(self, plan_yaml) -> dict: From ffde95b1c894ec5002c11a60d27bf56dc2647adf Mon Sep 17 00:00:00 2001 From: refai06 Date: Sat, 15 Feb 2025 00:50:38 +0530 Subject: [PATCH 4/7] Added testcase Signed-off-by: refai06 --- .../workflow/notebooktools/notebook_tools.py | 2 +- .../301_MNIST_Watermarking.ipynb | 924 ++++++++++++++++++ .../test_artifacts/actual/.workspace | 2 + .../test_artifacts/actual/plan/cols.yaml | 5 + .../test_artifacts/actual/plan/data.yaml | 51 + .../test_artifacts/actual/plan/defaults | 2 + .../test_artifacts/actual/plan/plan.yaml | 20 + .../test_artifacts/actual/requirements.txt | 6 + .../test_artifacts/actual/src/__init__.py | 2 + .../test_artifacts/actual/src/experiment.py | 664 +++++++++++++ .../test_artifacts/expected/.workspace | 2 + .../test_artifacts/expected/plan/cols.yaml | 5 + .../test_artifacts/expected/plan/data.yaml | 51 + .../test_artifacts/expected/plan/defaults | 2 + .../test_artifacts/expected/plan/plan.yaml | 20 + .../test_artifacts/expected/requirements.txt | 6 + .../test_artifacts/expected/src/__init__.py | 2 + .../test_artifacts/expected/src/experiment.py | 664 +++++++++++++ .../testcase_export/test_script.py | 73 ++ .../MNIST_Watermarking.ipynb | 587 +++++++++++ .../test_artifacts/actual/.workspace | 2 + .../test_artifacts/actual/plan/defaults | 2 + .../test_artifacts/actual/plan/plan.yaml | 25 + .../test_artifacts/actual/requirements.txt | 7 + .../test_artifacts/actual/src/__init__.py | 2 + .../test_artifacts/actual/src/experiment.py | 380 +++++++ .../test_artifacts/expected/.workspace | 2 + .../test_artifacts/expected/plan/defaults | 2 + .../test_artifacts/expected/plan/plan.yaml | 25 + .../test_artifacts/expected/requirements.txt | 7 + .../test_artifacts/expected/src/__init__.py | 2 + .../test_artifacts/expected/src/experiment.py | 380 +++++++ .../testcase_export_federated/test_script.py | 95 ++ 33 files changed, 4020 insertions(+), 1 deletion(-) create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py create mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py index 63d26ddeeb..f75cb2a6cf 100644 --- a/openfl/experimental/workflow/notebooktools/notebook_tools.py +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -205,7 +205,7 @@ def _generate_data_yaml(self) -> None: data = self._initialize_data_yaml(data_yaml) # Initiaize runtime name - runtime_name = "local_runtime" + runtime_name = "runtime_local" # Process aggregator information using CodeAnalyzer runtime_created = self.code_analyzer.process_aggregator( diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb b/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb new file mode 100644 index 0000000000..dcd327ed1a --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb @@ -0,0 +1,924 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Workflow Interface 301: Watermarking\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/intel/openfl/blob/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8f28c451", + "metadata": {}, + "source": [ + "This OpenFL Workflow Interface tutorial demonstrates Watermarking of DL Model in Federated Learning. Watermarking enables the Model owner to assert ownership rights and detect stolen model instances. \n", + "\n", + "In this tutorial we use Backdooring to embed Watermark on a DL model trained on MNIST Dataset. This involves training the DL model with both the actual training data and the backdoor (a.k.a Watermark dataset). Watermark dataset is designed by the Model owner and consists of mislabelled input and output data pairs. Watermarked model performs normally on the Target dataset but returns incorrect labels on the Watermark dataset. Watermark dataset needs to be hidden from the Collaborators and Watermarking embedding needs to be performed at a trusted entity (Aggregator in this case)\n", + "\n", + "This workflow demonstrates: \n", + "- Flexibility to define the Watermark embedding steps as Aggregator processing steps without any involvement of Collaborators\n", + "- Ability to define Watermark dataset as a private attribute of Aggregator entity\n", + "- Flexibility to select a subset of collaborators on which Model Training is performed every training round\n", + "- Visualize the Workflow as a Graph\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "cell_type": "markdown", + "id": "ff167e44", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e9a73bd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "e69cdbeb", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "First we start by installing the necessary dependencies for the workflow interface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7475cba", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# !pip install git+https://github.com/securefederatedai/openfl.git\n", + "!pip install -r workflow_interface_requirements.txt\n", + "!pip install torch\n", + "!pip install torchvision\n", + "!pip install matplotlib\n", + "!pip install git+https://github.com/pyviz-topics/imagen.git@master\n", + "!pip install holoviews==1.15.4\n", + "\n", + "\n", + "# Uncomment this if running in Google Colab\n", + "#!pip install -r https://raw.githubusercontent.com/intel/openfl/develop/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt\n", + "#import os\n", + "#os.environ[\"USERNAME\"] = \"colab\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7bd566df", + "metadata": {}, + "source": [ + "We begin with the quintessential example of a pytorch CNN model trained on the MNIST dataset. Let's start by defining our dataloaders, model, optimizer, and some helper functions like we would for any other deep learning experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "import torchvision\n", + "import numpy as np\n", + "import random\n", + "import pathlib\n", + "import os\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import PIL.Image as Image\n", + "import imagen as ig\n", + "import numbergen as ng\n", + "\n", + "random_seed = 1\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "\n", + "# MNIST Train and Test datasets\n", + "mnist_train = torchvision.datasets.MNIST(\n", + " \"./files/\",\n", + " train=True,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")\n", + "\n", + "mnist_test = torchvision.datasets.MNIST(\n", + " \"./files/\",\n", + " train=False,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")\n", + "\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self, dropout=0.0):\n", + " super(Net, self).__init__()\n", + " self.dropout = dropout\n", + " self.block = nn.Sequential(\n", + " nn.Conv2d(1, 32, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(32, 64, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(64, 128, 2),\n", + " nn.ReLU(),\n", + " )\n", + " self.fc1 = nn.Linear(128 * 5**2, 200)\n", + " self.fc2 = nn.Linear(200, 10)\n", + " self.relu = nn.ReLU()\n", + " self.dropout = nn.Dropout(p=dropout)\n", + "\n", + " def forward(self, x):\n", + " x = self.dropout(x)\n", + " out = self.block(x)\n", + " out = out.view(-1, 128 * 5**2)\n", + " out = self.dropout(out)\n", + " out = self.relu(self.fc1(out))\n", + " out = self.dropout(out)\n", + " out = self.fc2(out)\n", + " return F.log_softmax(out, 1)\n", + "\n", + "\n", + "def inference(network, test_loader):\n", + " network.eval()\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = network(data)\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy\n", + "\n", + "\n", + "def train_model(model, optimizer, data_loader, entity, round_number, log=False):\n", + " # Helper function to train the model\n", + " train_loss = 0\n", + " log_interval = 20\n", + " model.train()\n", + " for batch_idx, (X, y) in enumerate(data_loader):\n", + " optimizer.zero_grad()\n", + "\n", + " output = model(X)\n", + " loss = F.nll_loss(output, y)\n", + " loss.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " train_loss += loss.item() * len(X)\n", + " if batch_idx % log_interval == 0 and log:\n", + " print(\"{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}\".format(\n", + " entity,\n", + " round_number,\n", + " batch_idx * len(X),\n", + " len(data_loader.dataset),\n", + " 100.0 * batch_idx / len(data_loader),\n", + " loss.item(),\n", + " )\n", + " )\n", + " train_loss /= len(data_loader.dataset)\n", + " return train_loss" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f0c55175", + "metadata": {}, + "source": [ + "Watermark dataset consists of mislabelled (input, output) data pairs and is designed such that the model learns to exhibit an unusual prediction behavior on data points from this dataset. The unusual behavior can then be used to demonstrate model ownership and identify illegitimate model copies\n", + "\n", + "Let us prepare and inspect the sample Watermark dataset consisting of 100 images = 10 classes (1 for each digit) x 10 images (per class). Watermark images were generated by superimposing a unique pattern (per class) on a noisy background (10 images / class). (Reference - WAFFLE: Watermarking in Federated Learning https://arxiv.org/abs/2008.07298)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcad2624", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "watermark_dir = \"./files/watermark-dataset/MWAFFLE/\"\n", + "\n", + "\n", + "def generate_watermark(\n", + " x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir\n", + "):\n", + " \"\"\"\n", + " Generate Watermark by superimposing a pattern on noisy background.\n", + "\n", + " Parameters\n", + " ----------\n", + " x_size: x dimension of the image\n", + " y_size: y dimension of the image\n", + " num_class: number of classes in the original dataset\n", + " num_samples_per_class: number of samples to be generated per class\n", + " img_dir: directory for saving watermark dataset\n", + "\n", + " Reference\n", + " ---------\n", + " WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", + "\n", + " \"\"\"\n", + " x_pattern = int(x_size * 2 / 3.0 - 1)\n", + " y_pattern = int(y_size * 2 / 3.0 - 1)\n", + "\n", + " np.random.seed(0)\n", + " for cls in range(num_class):\n", + " patterns = []\n", + " random_seed = 10 + cls\n", + " patterns.append(\n", + " ig.Line(\n", + " xdensity=x_pattern,\n", + " ydensity=y_pattern,\n", + " thickness=0.001,\n", + " orientation=np.pi * ng.UniformRandom(seed=random_seed),\n", + " x=ng.UniformRandom(seed=random_seed) - 0.5,\n", + " y=ng.UniformRandom(seed=random_seed) - 0.5,\n", + " scale=0.8,\n", + " )\n", + " )\n", + " patterns.append(\n", + " ig.Arc(\n", + " xdensity=x_pattern,\n", + " ydensity=y_pattern,\n", + " thickness=0.001,\n", + " orientation=np.pi * ng.UniformRandom(seed=random_seed),\n", + " x=ng.UniformRandom(seed=random_seed) - 0.5,\n", + " y=ng.UniformRandom(seed=random_seed) - 0.5,\n", + " size=0.33,\n", + " )\n", + " )\n", + "\n", + " pat = np.zeros((x_pattern, y_pattern))\n", + " for i in range(6):\n", + " j = np.random.randint(len(patterns))\n", + " pat += patterns[j]()\n", + " res = pat > 0.5\n", + " pat = res.astype(int)\n", + "\n", + " x_offset = np.random.randint(x_size - x_pattern + 1)\n", + " y_offset = np.random.randint(y_size - y_pattern + 1)\n", + "\n", + " for i in range(num_samples_per_class):\n", + " base = np.random.rand(x_size, y_size)\n", + " # base = np.zeros((x_input, y_input))\n", + " base[\n", + " x_offset : x_offset + pat.shape[0],\n", + " y_offset : y_offset + pat.shape[1],\n", + " ] += pat\n", + " d = np.ones((x_size, x_size))\n", + " img = np.minimum(base, d)\n", + " if not os.path.exists(img_dir + str(cls) + \"/\"):\n", + " os.makedirs(img_dir + str(cls) + \"/\")\n", + " plt.imsave(\n", + " img_dir + str(cls) + \"/wm_\" + str(i + 1) + \".png\",\n", + " img,\n", + " cmap=matplotlib.cm.gray,\n", + " )\n", + "\n", + "\n", + "# If the Watermark dataset does not exist, generate and save the Watermark images\n", + "watermark_path = pathlib.Path(watermark_dir)\n", + "if watermark_path.exists() and watermark_path.is_dir():\n", + " print(\n", + " f\"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... \"\n", + " )\n", + " pass\n", + "else:\n", + " print(f\"Generating Watermark dataset... \")\n", + " generate_watermark()\n", + "\n", + "\n", + "class WatermarkDataset(torch.utils.data.Dataset):\n", + " def __init__(self, images_dir, label_dir=None, transforms=None):\n", + " self.images_dir = os.path.abspath(images_dir)\n", + " self.image_paths = [\n", + " os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir)\n", + " ]\n", + " self.label_paths = label_dir\n", + " self.transform = transforms\n", + " temp = []\n", + "\n", + " # Recursively counting total number of images in the directory\n", + " for image_path in self.image_paths:\n", + " for path in os.walk(image_path):\n", + " if len(path) <= 1:\n", + " continue\n", + " path = path[2]\n", + " for im_n in [image_path + \"/\" + p for p in path]:\n", + " temp.append(im_n)\n", + " self.image_paths = temp\n", + "\n", + " if len(self.image_paths) == 0:\n", + " raise Exception(f\"No file(s) found under {images_dir}\")\n", + "\n", + " def __len__(self):\n", + " return len(self.image_paths)\n", + "\n", + " def __getitem__(self, idx):\n", + " image_filepath = self.image_paths[idx]\n", + " image = Image.open(image_filepath)\n", + " image = image.convert(\"RGB\")\n", + " image = self.transform(image)\n", + " label = int(image_filepath.split(\"/\")[-2])\n", + "\n", + " return image, label\n", + "\n", + "\n", + "def get_watermark_transforms():\n", + " return torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.Grayscale(),\n", + " torchvision.transforms.Resize(28),\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize\n", + " ]\n", + " )\n", + "\n", + "\n", + "watermark_data = WatermarkDataset(\n", + " images_dir=watermark_dir,\n", + " transforms=get_watermark_transforms(),\n", + ")\n", + "\n", + "# Set display_watermark to True to display the Watermark dataset\n", + "display_watermark = True\n", + "if display_watermark:\n", + " # Inspect and plot the Watermark Images\n", + " wm_images = np.empty((100, 28, 28))\n", + " wm_labels = np.empty([100, 1], dtype=int)\n", + "\n", + " for i in range(len(watermark_data)):\n", + " img, label = watermark_data[i]\n", + " wm_labels[label * 10 + i % 10] = label\n", + " wm_images[label * 10 + i % 10, :, :] = img.numpy()\n", + "\n", + " fig = plt.figure(figsize=(120, 120))\n", + " for i in range(100):\n", + " plt.subplot(10, 10, i + 1)\n", + " plt.imshow(wm_images[i], interpolation=\"none\")\n", + " plt.title(\"Label: {}\".format(wm_labels[i]), fontsize=80)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d82d34fd", + "metadata": {}, + "source": [ + "Next we import the `FLSpec`, `LocalRuntime`, placement decorators (`aggregator/collaborator`), and `InspectFlow`.\n", + "\n", + "- `FLSpec` – Defines the flow specification. User defined flows are subclasses of this.\n", + "- `Runtime` – Defines where the flow runs, infrastructure for task transitions (how information gets sent). The `LocalRuntime` runs the flow on a single node.\n", + "- `aggregator/collaborator` - placement decorators that define where the task will be assigned\n", + "- `InspectFlow` – Utility to visualize the User-defined workflow as a Graph (only currently compatible in flows without loops)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from copy import deepcopy\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator\n", + "from openfl.experimental.workflow.runtime import LocalRuntime\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "from openfl.experimental.workflow.utilities.ui import InspectFlow\n", + "\n", + "\n", + "def FedAvg(agg_model, models, weights=None):\n", + " state_dicts = [model.state_dict() for model in models]\n", + " state_dict = agg_model.state_dict()\n", + " for key in models[0].state_dict():\n", + " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],\n", + " axis=0, \n", + " weights=weights))\n", + " \n", + " agg_model.load_state_dict(state_dict)\n", + " return agg_model" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "c917b085", + "metadata": {}, + "source": [ + "Let us now define the Workflow for Watermark embedding. Here we use the same tasks as the [quickstart](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb), and define following additional steps for Watermarking\n", + "- PRE-TRAIN (watermark_retrain): At the start (once), initial model is trained on Watermark dataset for a specified number of epochs \n", + "- RE-TRAIN (watermark_pretrain): Every training round, Aggregated model is retrained on Watermark dataset until a desired acc threshold is reached or max number of retrain rounds are expired\n", + "\n", + "Notice that both the PRE-TRAIN and RE-TRAIN tasks are defined as Aggregator processing tasks\n", + "\n", + "![image.png](attachment:image.png)\n", + "\n", + "
Workflow for Watermarking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class FederatedFlow_MNIST_Watermarking(FLSpec):\n", + " \"\"\"\n", + " This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning\n", + " Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model=None,\n", + " optimizer=None,\n", + " watermark_pretrain_optimizer=None,\n", + " watermark_retrain_optimizer=None,\n", + " round_number=0,\n", + " **kwargs,\n", + " ):\n", + " super().__init__(**kwargs)\n", + "\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " self.watermark_pretrain_optimizer = watermark_pretrain_optimizer\n", + " self.watermark_retrain_optimizer = watermark_retrain_optimizer\n", + " else:\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + " self.watermark_pretrain_optimizer = optim.SGD(\n", + " self.model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + " )\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + " self.round_number = round_number\n", + " self.watermark_pretraining_completed = False\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " This is the start of the Flow.\n", + " \"\"\"\n", + "\n", + " print(f\": Start of flow ... \")\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " # Randomly select a fraction of actual collaborator every round\n", + " fraction = 0.5\n", + " if int(fraction * len(self.collaborators)) < 1:\n", + " raise Exception(\n", + " f\"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training\"\n", + " )\n", + " self.subset_collaborators = random.sample(\n", + " self.collaborators, int(fraction * (len(self.collaborators)))\n", + " )\n", + "\n", + " self.next(self.watermark_pretrain)\n", + "\n", + " @aggregator\n", + " def watermark_pretrain(self):\n", + " \"\"\"\n", + " Pre-Train the Model before starting Federated Learning.\n", + " \"\"\"\n", + " if not self.watermark_pretraining_completed:\n", + "\n", + " print(\": Performing Watermark Pre-training\")\n", + "\n", + " for i in range(self.pretrain_epochs):\n", + "\n", + " watermark_pretrain_loss = train_model(\n", + " self.model,\n", + " self.watermark_pretrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \":\",\n", + " i,\n", + " log=False,\n", + " )\n", + " watermark_pretrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(\n", + " \": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}\".format(\n", + " i,\n", + " watermark_pretrain_loss,\n", + " watermark_pretrain_validation_score,\n", + " )\n", + " )\n", + "\n", + " self.watermark_pretraining_completed = True\n", + "\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"subset_collaborators\",\n", + " exclude=[\"watermark_pretrain_optimizer\", \"watermark_retrain_optimizer\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " \"\"\"\n", + " Perform Aggregated Model validation on Collaborators.\n", + " \"\"\"\n", + " self.agg_validation_score = inference(self.model, self.test_loader)\n", + " print(\n", + " f\" Aggregated Model validation score = {self.agg_validation_score}\"\n", + " )\n", + "\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " \"\"\"\n", + " Train model on Local collab dataset.\n", + "\n", + " \"\"\"\n", + " print(\": Performing Model Training on Local dataset ... \")\n", + "\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + "\n", + " self.loss = train_model(\n", + " self.model,\n", + " self.optimizer,\n", + " self.train_loader,\n", + " \"\"),\n", + " self.round_number if self.round_number is not None else 0,\n", + " log=True,\n", + " )\n", + "\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " \"\"\"\n", + " Validate locally trained model.\n", + "\n", + " \"\"\"\n", + " self.local_validation_score = inference(self.model, self.test_loader)\n", + " print(\n", + " f\" Local model validation score = {self.local_validation_score}\"\n", + " )\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Model aggregation step.\n", + " \"\"\"\n", + "\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score for input in inputs\n", + " ) / len(inputs)\n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score for input in inputs\n", + " ) / len(inputs)\n", + "\n", + " print(f\": Joining models from collaborators...\")\n", + "\n", + " print(\n", + " f\" Aggregated model validation score = {self.aggregated_model_accuracy}\"\n", + " )\n", + " print(f\" Average training loss = {self.average_loss}\")\n", + " print(f\" Average local model validation values = {self.local_model_accuracy}\")\n", + "\n", + " self.model = FedAvg(self.model, [input.model for input in inputs])\n", + "\n", + " self.next(self.watermark_retrain)\n", + "\n", + " @aggregator\n", + " def watermark_retrain(self):\n", + " \"\"\"\n", + " Retrain the aggregated model.\n", + "\n", + " \"\"\"\n", + " print(\": Performing Watermark Retraining ... \")\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + "\n", + " retrain_round = 0\n", + "\n", + " # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs)\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + " while (\n", + " self.watermark_retrain_validation_score < self.watermark_acc_threshold\n", + " ) and (retrain_round < self.retrain_epochs):\n", + " self.watermark_retrain_train_loss = train_model(\n", + " self.model,\n", + " self.watermark_retrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \"\",\n", + " retrain_round,\n", + " log=False,\n", + " )\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(\n", + " \": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}\".format(\n", + " self.round_number,\n", + " retrain_round,\n", + " self.watermark_retrain_train_loss,\n", + " self.watermark_retrain_validation_score,\n", + " )\n", + " )\n", + "\n", + " retrain_round += 1\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the last step in the Flow.\n", + "\n", + " \"\"\"\n", + " print(f\"This is the end of the flow\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c6da2c42", + "metadata": {}, + "source": [ + "In the `FederatedFlow_MNIST_Watermarking` definition above, you will notice that certain attributes of the flow were not initialized, namely the `watermark_data_loader` for Aggregator and `train_loader`, `test_loader` for the Collaborators. \n", + "\n", + "- Collaborator attributes are created in the same manner as described in [quickstart](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb)\n", + "\n", + "- `watermark_data_loader` is created as a **private attribute** of the Aggregator which is set by `callable_to_initialize_aggregator_private_attributes` callable function. It is exposed only via the runtime. This property enables the Watermark dataset to be hidden from the collaborators as Aggregator private attributes are filtered before the state is transferred to Collaborators (in the same manner as Collaborator private attributes are hidden from Aggregator)\n", + "\n", + "Lets define these attributes along with some other parameters (seed, batch-sizes, optimizer parameters) and create the LocalRuntime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bffcc141", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# Set random seed\n", + "random_seed = 42\n", + "torch.manual_seed(random_seed)\n", + "np.random.seed(random_seed)\n", + "torch.backends.cudnn.enabled = False\n", + "\n", + "# Batch sizes\n", + "batch_size_train = 64\n", + "batch_size_test = 64\n", + "batch_size_watermark = 50\n", + "\n", + "# MNIST parameters\n", + "learning_rate = 5e-2\n", + "momentum = 5e-1\n", + "log_interval = 20\n", + "\n", + "# Watermarking parameters\n", + "watermark_pretrain_learning_rate = 1e-1\n", + "watermark_pretrain_momentum = 5e-1\n", + "watermark_pretrain_weight_decay = 5e-05\n", + "watermark_retrain_learning_rate = 5e-3" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3d7ce52f", + "metadata": {}, + "source": [ + "## Setup Federation\n", + "\n", + "Private attributes can be set using callback function while instantiating the participant. Parameters required by the callback function are specified as arguments while instantiating the participant. In this example callback function, there are 2 callable function namely `callable_to_initialize_aggregator_private_attributes`, and `callable_to_initialize_collaborator_private_attributes`, returns the private attributes respectively for aggregator and collaborator.\n", + "\n", + "\n", + "Aggregator callable function `callable_to_initialize_aggregator_private_attributes` returns `watermark_data_loader`, `pretrain_epochs`, `retrain_epochs`, `watermark_acc_threshold`, and `watermark_pretraining_completed`. Collaborator callable function `callable_to_initialize_aggregator_private_attributes` returns `train_loader` and `test_loader` of the collaborator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5f6e104", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size):\n", + " return {\n", + " \"watermark_data_loader\": torch.utils.data.DataLoader(\n", + " watermark_data, batch_size=batch_size, shuffle=True\n", + " ),\n", + " \"pretrain_epochs\": 25,\n", + " \"retrain_epochs\": 25,\n", + " \"watermark_acc_threshold\": 0.98,\n", + " }\n", + "\n", + "# Setup Aggregator private attributes via callable function\n", + "aggregator = Aggregator(\n", + " name=\"agg\",\n", + " private_attributes_callable=callable_to_initialize_aggregator_private_attributes,\n", + " watermark_data=watermark_data,\n", + " batch_size=batch_size_watermark,\n", + " )\n", + "\n", + "collaborator_names = [\n", + " \"Portland\",\n", + " \"Seattle\",\n", + " \"Chandler\",\n", + " \"Bangalore\",\n", + " \"New Delhi\",\n", + "]\n", + "\n", + "def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset):\n", + " train = deepcopy(train_dataset)\n", + " test = deepcopy(test_dataset)\n", + " train.data = train_dataset.data[index::n_collaborators]\n", + " train.targets = train_dataset.targets[index::n_collaborators]\n", + " test.data = test_dataset.data[index::n_collaborators]\n", + " test.targets = test_dataset.targets[index::n_collaborators]\n", + "\n", + " return {\n", + " \"train_loader\": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True),\n", + " \"test_loader\": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True),\n", + " }\n", + "\n", + "# Setup Collaborators private attributes via callable function\n", + "collaborators = []\n", + "for idx, collaborator_name in enumerate(collaborator_names):\n", + " collaborators.append(\n", + " Collaborator(\n", + " name=collaborator_name, num_cpus=0, num_gpus=0,\n", + " private_attributes_callable=callable_to_initialize_collaborator_private_attributes,\n", + " index=idx, n_collaborators=len(collaborator_names),\n", + " train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64\n", + " )\n", + " )\n", + "\n", + "local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend=\"ray\")\n", + "print(f\"Local runtime collaborators = {local_runtime.collaborators}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "02935ccf", + "metadata": {}, + "source": [ + "Now that we have our flow and runtime defined, let's run the experiment! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "model = Net()\n", + "optimizer = optim.SGD(\n", + " model.parameters(), lr=learning_rate, momentum=momentum\n", + ")\n", + "watermark_pretrain_optimizer = optim.SGD(\n", + " model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + ")\n", + "watermark_retrain_optimizer = optim.SGD(\n", + " model.parameters(), lr=watermark_retrain_learning_rate\n", + ")\n", + "best_model = None\n", + "round_number = 0\n", + "top_model_accuracy = 0\n", + "\n", + "flflow = FederatedFlow_MNIST_Watermarking(\n", + " model,\n", + " optimizer,\n", + " watermark_pretrain_optimizer,\n", + " watermark_retrain_optimizer,\n", + " round_number,\n", + " checkpoint=True,\n", + ")\n", + "flflow.runtime = local_runtime\n", + "for i in range(1):\n", + " print(f\"Starting round {i}...\")\n", + " flflow.run()\n", + " flflow.round_number += 1\n", + " if hasattr(flflow, \"aggregated_model_accuracy\"):\n", + " aggregated_model_accuracy = flflow.aggregated_model_accuracy\n", + " if aggregated_model_accuracy > top_model_accuracy:\n", + " print(\n", + " f\"\\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\\n\"\n", + " )\n", + " top_model_accuracy = aggregated_model_accuracy\n", + " best_model = flflow.model\n", + "\n", + " torch.save(best_model.state_dict(), \"watermarked_mnist_model.pth\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env-workspace-builder-openfl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml new file mode 100644 index 0000000000..f39d623fc6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml @@ -0,0 +1,51 @@ +Bangalore: + callable_func: + settings: + batch_size: 64 + index: 3 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Chandler: + callable_func: + settings: + batch_size: 64 + index: 2 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +New Delhi: + callable_func: + settings: + batch_size: 64 + index: 4 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Portland: + callable_func: + settings: + batch_size: 64 + index: 0 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Seattle: + callable_func: + settings: + batch_size: 64 + index: 1 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +aggregator: + callable_func: + settings: + batch_size: 50 + watermark_data: src.experiment.watermark_data + template: src.experiment.callable_to_initialize_aggregator_private_attributes diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml new file mode 100644 index 0000000000..c9bea91dfa --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml @@ -0,0 +1,20 @@ +aggregator: + defaults: plan/defaults/aggregator.yaml + settings: + rounds_to_train: 1 + template: openfl.experimental.workflow.component.Aggregator +collaborator: + defaults: plan/defaults/collaborator.yaml + settings: {} + template: openfl.experimental.workflow.component.Collaborator +federated_flow: + settings: + checkpoint: true + model: src.experiment.model + optimizer: src.experiment.optimizer + round_number: 0 + watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer + watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer + template: src.experiment.FederatedFlow_MNIST_Watermarking +network: + defaults: plan/defaults/network.yaml diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt new file mode 100644 index 0000000000..8946ff2cac --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt @@ -0,0 +1,6 @@ +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +torch +torchvision +matplotlib +git+https://github.com/pyviz-topics/imagen.git@master +holoviews==1.15.4 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py new file mode 100644 index 0000000000..49883934a8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py new file mode 100644 index 0000000000..a984387881 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py @@ -0,0 +1,664 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../301_MNIST_Watermarking.ipynb. + +# %% auto 0 +__all__ = ['random_seed', 'mnist_train', 'mnist_test', 'watermark_dir', 'watermark_path', 'watermark_data', 'display_watermark', + 'batch_size_train', 'batch_size_test', 'batch_size_watermark', 'learning_rate', 'momentum', 'log_interval', + 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', 'watermark_pretrain_weight_decay', + 'watermark_retrain_learning_rate', 'aggregator', 'collaborator_names', 'collaborators', 'local_runtime', + 'model', 'optimizer', 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'best_model', + 'round_number', 'top_model_accuracy', 'flflow', 'Net', 'inference', 'train_model', 'generate_watermark', + 'WatermarkDataset', 'get_watermark_transforms', 'FedAvg', 'FederatedFlow_MNIST_Watermarking', + 'callable_to_initialize_aggregator_private_attributes', + 'callable_to_initialize_collaborator_private_attributes'] + +# %% ../../../301_MNIST_Watermarking.ipynb 7 + + +# Uncomment this if running in Google Colab +#import os +#os.environ["USERNAME"] = "colab" + +# %% ../../../301_MNIST_Watermarking.ipynb 9 +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch +import torchvision +import numpy as np +import random +import pathlib +import os +import matplotlib +import matplotlib.pyplot as plt +import PIL.Image as Image +import imagen as ig +import numbergen as ng + +random_seed = 1 +torch.backends.cudnn.enabled = False +torch.manual_seed(random_seed) + +# MNIST Train and Test datasets +mnist_train = torchvision.datasets.MNIST( + "./files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +mnist_test = torchvision.datasets.MNIST( + "./files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +class Net(nn.Module): + def __init__(self, dropout=0.0): + super(Net, self).__init__() + self.dropout = dropout + self.block = nn.Sequential( + nn.Conv2d(1, 32, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(32, 64, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(64, 128, 2), + nn.ReLU(), + ) + self.fc1 = nn.Linear(128 * 5**2, 200) + self.fc2 = nn.Linear(200, 10) + self.relu = nn.ReLU() + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + x = self.dropout(x) + out = self.block(x) + out = out.view(-1, 128 * 5**2) + out = self.dropout(out) + out = self.relu(self.fc1(out)) + out = self.dropout(out) + out = self.fc2(out) + return F.log_softmax(out, 1) + + +def inference(network, test_loader): + network.eval() + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + output = network(data) + pred = output.data.max(1, keepdim=True)[1] + correct += pred.eq(target.data.view_as(pred)).sum() + accuracy = float(correct / len(test_loader.dataset)) + return accuracy + + +def train_model(model, optimizer, data_loader, entity, round_number, log=False): + # Helper function to train the model + train_loss = 0 + log_interval = 20 + model.train() + for batch_idx, (X, y) in enumerate(data_loader): + optimizer.zero_grad() + + output = model(X) + loss = F.nll_loss(output, y) + loss.backward() + + optimizer.step() + + train_loss += loss.item() * len(X) + if batch_idx % log_interval == 0 and log: + print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( + entity, + round_number, + batch_idx * len(X), + len(data_loader.dataset), + 100.0 * batch_idx / len(data_loader), + loss.item(), + ) + ) + train_loss /= len(data_loader.dataset) + return train_loss + +# %% ../../../301_MNIST_Watermarking.ipynb 11 +watermark_dir = "./files/watermark-dataset/MWAFFLE/" + + +def generate_watermark( + x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir +): + """ + Generate Watermark by superimposing a pattern on noisy background. + + Parameters + ---------- + x_size: x dimension of the image + y_size: y dimension of the image + num_class: number of classes in the original dataset + num_samples_per_class: number of samples to be generated per class + img_dir: directory for saving watermark dataset + + Reference + --------- + WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + + """ + x_pattern = int(x_size * 2 / 3.0 - 1) + y_pattern = int(y_size * 2 / 3.0 - 1) + + np.random.seed(0) + for cls in range(num_class): + patterns = [] + random_seed = 10 + cls + patterns.append( + ig.Line( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + scale=0.8, + ) + ) + patterns.append( + ig.Arc( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + size=0.33, + ) + ) + + pat = np.zeros((x_pattern, y_pattern)) + for i in range(6): + j = np.random.randint(len(patterns)) + pat += patterns[j]() + res = pat > 0.5 + pat = res.astype(int) + + x_offset = np.random.randint(x_size - x_pattern + 1) + y_offset = np.random.randint(y_size - y_pattern + 1) + + for i in range(num_samples_per_class): + base = np.random.rand(x_size, y_size) + # base = np.zeros((x_input, y_input)) + base[ + x_offset : x_offset + pat.shape[0], + y_offset : y_offset + pat.shape[1], + ] += pat + d = np.ones((x_size, x_size)) + img = np.minimum(base, d) + if not os.path.exists(img_dir + str(cls) + "/"): + os.makedirs(img_dir + str(cls) + "/") + plt.imsave( + img_dir + str(cls) + "/wm_" + str(i + 1) + ".png", + img, + cmap=matplotlib.cm.gray, + ) + + +# If the Watermark dataset does not exist, generate and save the Watermark images +watermark_path = pathlib.Path(watermark_dir) +if watermark_path.exists() and watermark_path.is_dir(): + print( + f"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... " + ) + pass +else: + print(f"Generating Watermark dataset... ") + generate_watermark() + + +class WatermarkDataset(torch.utils.data.Dataset): + def __init__(self, images_dir, label_dir=None, transforms=None): + self.images_dir = os.path.abspath(images_dir) + self.image_paths = [ + os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir) + ] + self.label_paths = label_dir + self.transform = transforms + temp = [] + + # Recursively counting total number of images in the directory + for image_path in self.image_paths: + for path in os.walk(image_path): + if len(path) <= 1: + continue + path = path[2] + for im_n in [image_path + "/" + p for p in path]: + temp.append(im_n) + self.image_paths = temp + + if len(self.image_paths) == 0: + raise Exception(f"No file(s) found under {images_dir}") + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_filepath = self.image_paths[idx] + image = Image.open(image_filepath) + image = image.convert("RGB") + image = self.transform(image) + label = int(image_filepath.split("/")[-2]) + + return image, label + + +def get_watermark_transforms(): + return torchvision.transforms.Compose( + [ + torchvision.transforms.Grayscale(), + torchvision.transforms.Resize(28), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize + ] + ) + + +watermark_data = WatermarkDataset( + images_dir=watermark_dir, + transforms=get_watermark_transforms(), +) + +# Set display_watermark to True to display the Watermark dataset +display_watermark = True +if display_watermark: + # Inspect and plot the Watermark Images + wm_images = np.empty((100, 28, 28)) + wm_labels = np.empty([100, 1], dtype=int) + + for i in range(len(watermark_data)): + img, label = watermark_data[i] + wm_labels[label * 10 + i % 10] = label + wm_images[label * 10 + i % 10, :, :] = img.numpy() + + fig = plt.figure(figsize=(120, 120)) + for i in range(100): + plt.subplot(10, 10, i + 1) + plt.imshow(wm_images[i], interpolation="none") + plt.title("Label: {}".format(wm_labels[i]), fontsize=80) + +# %% ../../../301_MNIST_Watermarking.ipynb 13 +from copy import deepcopy + +from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator +from openfl.experimental.workflow.runtime import LocalRuntime +from openfl.experimental.workflow.placement import aggregator, collaborator +from openfl.experimental.workflow.utilities.ui import InspectFlow + + +def FedAvg(agg_model, models, weights=None): + state_dicts = [model.state_dict() for model in models] + state_dict = agg_model.state_dict() + for key in models[0].state_dict(): + state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], + axis=0, + weights=weights)) + + agg_model.load_state_dict(state_dict) + return agg_model + +# %% ../../../301_MNIST_Watermarking.ipynb 15 +class FederatedFlow_MNIST_Watermarking(FLSpec): + """ + This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning + Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + """ + + def __init__( + self, + model=None, + optimizer=None, + watermark_pretrain_optimizer=None, + watermark_retrain_optimizer=None, + round_number=0, + **kwargs, + ): + super().__init__(**kwargs) + + if model is not None: + self.model = model + self.optimizer = optimizer + self.watermark_pretrain_optimizer = watermark_pretrain_optimizer + self.watermark_retrain_optimizer = watermark_retrain_optimizer + else: + self.model = Net() + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + self.watermark_pretrain_optimizer = optim.SGD( + self.model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, + ) + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + self.round_number = round_number + self.watermark_pretraining_completed = False + + @aggregator + def start(self): + """ + This is the start of the Flow. + """ + + print(f": Start of flow ... ") + self.collaborators = self.runtime.collaborators + + # Randomly select a fraction of actual collaborator every round + fraction = 0.5 + if int(fraction * len(self.collaborators)) < 1: + raise Exception( + f"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training" + ) + self.subset_collaborators = random.sample( + self.collaborators, int(fraction * (len(self.collaborators))) + ) + + self.next(self.watermark_pretrain) + + @aggregator + def watermark_pretrain(self): + """ + Pre-Train the Model before starting Federated Learning. + """ + if not self.watermark_pretraining_completed: + + print(": Performing Watermark Pre-training") + + for i in range(self.pretrain_epochs): + + watermark_pretrain_loss = train_model( + self.model, + self.watermark_pretrain_optimizer, + self.watermark_data_loader, + ":", + i, + log=False, + ) + watermark_pretrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print( + ": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}".format( + i, + watermark_pretrain_loss, + watermark_pretrain_validation_score, + ) + ) + + self.watermark_pretraining_completed = True + + self.next( + self.aggregated_model_validation, + foreach="subset_collaborators", + exclude=["watermark_pretrain_optimizer", "watermark_retrain_optimizer"], + ) + + @collaborator + def aggregated_model_validation(self): + """ + Perform Aggregated Model validation on Collaborators. + """ + self.agg_validation_score = inference(self.model, self.test_loader) + print( + f" Aggregated Model validation score = {self.agg_validation_score}" + ) + + self.next(self.train) + + @collaborator + def train(self): + """ + Train model on Local collab dataset. + + """ + print(": Performing Model Training on Local dataset ... ") + + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + + self.loss = train_model( + self.model, + self.optimizer, + self.train_loader, + ""), + self.round_number if self.round_number is not None else 0, + log=True, + ) + + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + """ + Validate locally trained model. + + """ + self.local_validation_score = inference(self.model, self.test_loader) + print( + f" Local model validation score = {self.local_validation_score}" + ) + self.next(self.join) + + @aggregator + def join(self, inputs): + """ + Model aggregation step. + """ + + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs + ) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs + ) / len(inputs) + + print(f": Joining models from collaborators...") + + print( + f" Aggregated model validation score = {self.aggregated_model_accuracy}" + ) + print(f" Average training loss = {self.average_loss}") + print(f" Average local model validation values = {self.local_model_accuracy}") + + self.model = FedAvg(self.model, [input.model for input in inputs]) + + self.next(self.watermark_retrain) + + @aggregator + def watermark_retrain(self): + """ + Retrain the aggregated model. + + """ + print(": Performing Watermark Retraining ... ") + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + + retrain_round = 0 + + # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + while ( + self.watermark_retrain_validation_score < self.watermark_acc_threshold + ) and (retrain_round < self.retrain_epochs): + self.watermark_retrain_train_loss = train_model( + self.model, + self.watermark_retrain_optimizer, + self.watermark_data_loader, + "", + retrain_round, + log=False, + ) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print( + ": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}".format( + self.round_number, + retrain_round, + self.watermark_retrain_train_loss, + self.watermark_retrain_validation_score, + ) + ) + + retrain_round += 1 + + self.next(self.end) + + @aggregator + def end(self): + """ + This is the last step in the Flow. + + """ + print(f"This is the end of the flow") + +# %% ../../../301_MNIST_Watermarking.ipynb 17 +# Set random seed +random_seed = 42 +torch.manual_seed(random_seed) +np.random.seed(random_seed) +torch.backends.cudnn.enabled = False + +# Batch sizes +batch_size_train = 64 +batch_size_test = 64 +batch_size_watermark = 50 + +# MNIST parameters +learning_rate = 5e-2 +momentum = 5e-1 +log_interval = 20 + +# Watermarking parameters +watermark_pretrain_learning_rate = 1e-1 +watermark_pretrain_momentum = 5e-1 +watermark_pretrain_weight_decay = 5e-05 +watermark_retrain_learning_rate = 5e-3 + +# %% ../../../301_MNIST_Watermarking.ipynb 19 +def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size): + return { + "watermark_data_loader": torch.utils.data.DataLoader( + watermark_data, batch_size=batch_size, shuffle=True + ), + "pretrain_epochs": 25, + "retrain_epochs": 25, + "watermark_acc_threshold": 0.98, + } + +# Setup Aggregator private attributes via callable function +aggregator = Aggregator( + name="agg", + private_attributes_callable=callable_to_initialize_aggregator_private_attributes, + watermark_data=watermark_data, + batch_size=batch_size_watermark, + ) + +collaborator_names = [ + "Portland", + "Seattle", + "Chandler", + "Bangalore", + "New Delhi", +] + +def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset): + train = deepcopy(train_dataset) + test = deepcopy(test_dataset) + train.data = train_dataset.data[index::n_collaborators] + train.targets = train_dataset.targets[index::n_collaborators] + test.data = test_dataset.data[index::n_collaborators] + test.targets = test_dataset.targets[index::n_collaborators] + + return { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), + } + +# Setup Collaborators private attributes via callable function +collaborators = [] +for idx, collaborator_name in enumerate(collaborator_names): + collaborators.append( + Collaborator( + name=collaborator_name, num_cpus=0, num_gpus=0, + private_attributes_callable=callable_to_initialize_collaborator_private_attributes, + index=idx, n_collaborators=len(collaborator_names), + train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64 + ) + ) + +local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend="single_process") +print(f"Local runtime collaborators = {local_runtime.collaborators}") + +# %% ../../../301_MNIST_Watermarking.ipynb 21 +model = Net() +optimizer = optim.SGD( + model.parameters(), lr=learning_rate, momentum=momentum +) +watermark_pretrain_optimizer = optim.SGD( + model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, +) +watermark_retrain_optimizer = optim.SGD( + model.parameters(), lr=watermark_retrain_learning_rate +) +best_model = None +round_number = 0 +top_model_accuracy = 0 + +flflow = FederatedFlow_MNIST_Watermarking( + model, + optimizer, + watermark_pretrain_optimizer, + watermark_retrain_optimizer, + round_number, + checkpoint=True, +) +flflow.runtime = local_runtime +for i in range(1): + print(f"Starting round {i}...") +# flflow.run() + flflow.round_number += 1 + if hasattr(flflow, "aggregated_model_accuracy"): + aggregated_model_accuracy = flflow.aggregated_model_accuracy + if aggregated_model_accuracy > top_model_accuracy: + print( + f"\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\n" + ) + top_model_accuracy = aggregated_model_accuracy + best_model = flflow.model + + torch.save(best_model.state_dict(), "watermarked_mnist_model.pth") diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml new file mode 100644 index 0000000000..f39d623fc6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml @@ -0,0 +1,51 @@ +Bangalore: + callable_func: + settings: + batch_size: 64 + index: 3 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Chandler: + callable_func: + settings: + batch_size: 64 + index: 2 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +New Delhi: + callable_func: + settings: + batch_size: 64 + index: 4 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Portland: + callable_func: + settings: + batch_size: 64 + index: 0 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +Seattle: + callable_func: + settings: + batch_size: 64 + index: 1 + n_collaborators: 5 + test_dataset: src.experiment.mnist_test + train_dataset: src.experiment.mnist_train + template: src.experiment.callable_to_initialize_collaborator_private_attributes +aggregator: + callable_func: + settings: + batch_size: 50 + watermark_data: src.experiment.watermark_data + template: src.experiment.callable_to_initialize_aggregator_private_attributes diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml new file mode 100644 index 0000000000..c9bea91dfa --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml @@ -0,0 +1,20 @@ +aggregator: + defaults: plan/defaults/aggregator.yaml + settings: + rounds_to_train: 1 + template: openfl.experimental.workflow.component.Aggregator +collaborator: + defaults: plan/defaults/collaborator.yaml + settings: {} + template: openfl.experimental.workflow.component.Collaborator +federated_flow: + settings: + checkpoint: true + model: src.experiment.model + optimizer: src.experiment.optimizer + round_number: 0 + watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer + watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer + template: src.experiment.FederatedFlow_MNIST_Watermarking +network: + defaults: plan/defaults/network.yaml diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt new file mode 100644 index 0000000000..8946ff2cac --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt @@ -0,0 +1,6 @@ +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +torch +torchvision +matplotlib +git+https://github.com/pyviz-topics/imagen.git@master +holoviews==1.15.4 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py new file mode 100644 index 0000000000..49883934a8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py new file mode 100644 index 0000000000..7612dc2dea --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py @@ -0,0 +1,664 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb. + +# %% auto 0 +__all__ = ['random_seed', 'mnist_train', 'mnist_test', 'watermark_dir', 'watermark_path', 'watermark_data', 'display_watermark', + 'batch_size_train', 'batch_size_test', 'batch_size_watermark', 'learning_rate', 'momentum', 'log_interval', + 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', 'watermark_pretrain_weight_decay', + 'watermark_retrain_learning_rate', 'aggregator', 'collaborator_names', 'collaborators', 'local_runtime', + 'model', 'optimizer', 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'best_model', + 'round_number', 'top_model_accuracy', 'flflow', 'Net', 'inference', 'train_model', 'generate_watermark', + 'WatermarkDataset', 'get_watermark_transforms', 'FedAvg', 'FederatedFlow_MNIST_Watermarking', + 'callable_to_initialize_aggregator_private_attributes', + 'callable_to_initialize_collaborator_private_attributes'] + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 7 + + +# Uncomment this if running in Google Colab +#import os +#os.environ["USERNAME"] = "colab" + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 9 +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch +import torchvision +import numpy as np +import random +import pathlib +import os +import matplotlib +import matplotlib.pyplot as plt +import PIL.Image as Image +import imagen as ig +import numbergen as ng + +random_seed = 1 +torch.backends.cudnn.enabled = False +torch.manual_seed(random_seed) + +# MNIST Train and Test datasets +mnist_train = torchvision.datasets.MNIST( + "./files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +mnist_test = torchvision.datasets.MNIST( + "./files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +class Net(nn.Module): + def __init__(self, dropout=0.0): + super(Net, self).__init__() + self.dropout = dropout + self.block = nn.Sequential( + nn.Conv2d(1, 32, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(32, 64, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(64, 128, 2), + nn.ReLU(), + ) + self.fc1 = nn.Linear(128 * 5**2, 200) + self.fc2 = nn.Linear(200, 10) + self.relu = nn.ReLU() + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + x = self.dropout(x) + out = self.block(x) + out = out.view(-1, 128 * 5**2) + out = self.dropout(out) + out = self.relu(self.fc1(out)) + out = self.dropout(out) + out = self.fc2(out) + return F.log_softmax(out, 1) + + +def inference(network, test_loader): + network.eval() + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + output = network(data) + pred = output.data.max(1, keepdim=True)[1] + correct += pred.eq(target.data.view_as(pred)).sum() + accuracy = float(correct / len(test_loader.dataset)) + return accuracy + + +def train_model(model, optimizer, data_loader, entity, round_number, log=False): + # Helper function to train the model + train_loss = 0 + log_interval = 20 + model.train() + for batch_idx, (X, y) in enumerate(data_loader): + optimizer.zero_grad() + + output = model(X) + loss = F.nll_loss(output, y) + loss.backward() + + optimizer.step() + + train_loss += loss.item() * len(X) + if batch_idx % log_interval == 0 and log: + print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( + entity, + round_number, + batch_idx * len(X), + len(data_loader.dataset), + 100.0 * batch_idx / len(data_loader), + loss.item(), + ) + ) + train_loss /= len(data_loader.dataset) + return train_loss + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 11 +watermark_dir = "./files/watermark-dataset/MWAFFLE/" + + +def generate_watermark( + x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir +): + """ + Generate Watermark by superimposing a pattern on noisy background. + + Parameters + ---------- + x_size: x dimension of the image + y_size: y dimension of the image + num_class: number of classes in the original dataset + num_samples_per_class: number of samples to be generated per class + img_dir: directory for saving watermark dataset + + Reference + --------- + WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + + """ + x_pattern = int(x_size * 2 / 3.0 - 1) + y_pattern = int(y_size * 2 / 3.0 - 1) + + np.random.seed(0) + for cls in range(num_class): + patterns = [] + random_seed = 10 + cls + patterns.append( + ig.Line( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + scale=0.8, + ) + ) + patterns.append( + ig.Arc( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + size=0.33, + ) + ) + + pat = np.zeros((x_pattern, y_pattern)) + for i in range(6): + j = np.random.randint(len(patterns)) + pat += patterns[j]() + res = pat > 0.5 + pat = res.astype(int) + + x_offset = np.random.randint(x_size - x_pattern + 1) + y_offset = np.random.randint(y_size - y_pattern + 1) + + for i in range(num_samples_per_class): + base = np.random.rand(x_size, y_size) + # base = np.zeros((x_input, y_input)) + base[ + x_offset : x_offset + pat.shape[0], + y_offset : y_offset + pat.shape[1], + ] += pat + d = np.ones((x_size, x_size)) + img = np.minimum(base, d) + if not os.path.exists(img_dir + str(cls) + "/"): + os.makedirs(img_dir + str(cls) + "/") + plt.imsave( + img_dir + str(cls) + "/wm_" + str(i + 1) + ".png", + img, + cmap=matplotlib.cm.gray, + ) + + +# If the Watermark dataset does not exist, generate and save the Watermark images +watermark_path = pathlib.Path(watermark_dir) +if watermark_path.exists() and watermark_path.is_dir(): + print( + f"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... " + ) + pass +else: + print(f"Generating Watermark dataset... ") + generate_watermark() + + +class WatermarkDataset(torch.utils.data.Dataset): + def __init__(self, images_dir, label_dir=None, transforms=None): + self.images_dir = os.path.abspath(images_dir) + self.image_paths = [ + os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir) + ] + self.label_paths = label_dir + self.transform = transforms + temp = [] + + # Recursively counting total number of images in the directory + for image_path in self.image_paths: + for path in os.walk(image_path): + if len(path) <= 1: + continue + path = path[2] + for im_n in [image_path + "/" + p for p in path]: + temp.append(im_n) + self.image_paths = temp + + if len(self.image_paths) == 0: + raise Exception(f"No file(s) found under {images_dir}") + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_filepath = self.image_paths[idx] + image = Image.open(image_filepath) + image = image.convert("RGB") + image = self.transform(image) + label = int(image_filepath.split("/")[-2]) + + return image, label + + +def get_watermark_transforms(): + return torchvision.transforms.Compose( + [ + torchvision.transforms.Grayscale(), + torchvision.transforms.Resize(28), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize + ] + ) + + +watermark_data = WatermarkDataset( + images_dir=watermark_dir, + transforms=get_watermark_transforms(), +) + +# Set display_watermark to True to display the Watermark dataset +display_watermark = True +if display_watermark: + # Inspect and plot the Watermark Images + wm_images = np.empty((100, 28, 28)) + wm_labels = np.empty([100, 1], dtype=int) + + for i in range(len(watermark_data)): + img, label = watermark_data[i] + wm_labels[label * 10 + i % 10] = label + wm_images[label * 10 + i % 10, :, :] = img.numpy() + + fig = plt.figure(figsize=(120, 120)) + for i in range(100): + plt.subplot(10, 10, i + 1) + plt.imshow(wm_images[i], interpolation="none") + plt.title("Label: {}".format(wm_labels[i]), fontsize=80) + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 13 +from copy import deepcopy + +from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator +from openfl.experimental.workflow.runtime import LocalRuntime +from openfl.experimental.workflow.placement import aggregator, collaborator +from openfl.experimental.workflow.utilities.ui import InspectFlow + + +def FedAvg(agg_model, models, weights=None): + state_dicts = [model.state_dict() for model in models] + state_dict = agg_model.state_dict() + for key in models[0].state_dict(): + state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], + axis=0, + weights=weights)) + + agg_model.load_state_dict(state_dict) + return agg_model + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 15 +class FederatedFlow_MNIST_Watermarking(FLSpec): + """ + This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning + Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + """ + + def __init__( + self, + model=None, + optimizer=None, + watermark_pretrain_optimizer=None, + watermark_retrain_optimizer=None, + round_number=0, + **kwargs, + ): + super().__init__(**kwargs) + + if model is not None: + self.model = model + self.optimizer = optimizer + self.watermark_pretrain_optimizer = watermark_pretrain_optimizer + self.watermark_retrain_optimizer = watermark_retrain_optimizer + else: + self.model = Net() + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + self.watermark_pretrain_optimizer = optim.SGD( + self.model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, + ) + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + self.round_number = round_number + self.watermark_pretraining_completed = False + + @aggregator + def start(self): + """ + This is the start of the Flow. + """ + + print(f": Start of flow ... ") + self.collaborators = self.runtime.collaborators + + # Randomly select a fraction of actual collaborator every round + fraction = 0.5 + if int(fraction * len(self.collaborators)) < 1: + raise Exception( + f"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training" + ) + self.subset_collaborators = random.sample( + self.collaborators, int(fraction * (len(self.collaborators))) + ) + + self.next(self.watermark_pretrain) + + @aggregator + def watermark_pretrain(self): + """ + Pre-Train the Model before starting Federated Learning. + """ + if not self.watermark_pretraining_completed: + + print(": Performing Watermark Pre-training") + + for i in range(self.pretrain_epochs): + + watermark_pretrain_loss = train_model( + self.model, + self.watermark_pretrain_optimizer, + self.watermark_data_loader, + ":", + i, + log=False, + ) + watermark_pretrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print( + ": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}".format( + i, + watermark_pretrain_loss, + watermark_pretrain_validation_score, + ) + ) + + self.watermark_pretraining_completed = True + + self.next( + self.aggregated_model_validation, + foreach="subset_collaborators", + exclude=["watermark_pretrain_optimizer", "watermark_retrain_optimizer"], + ) + + @collaborator + def aggregated_model_validation(self): + """ + Perform Aggregated Model validation on Collaborators. + """ + self.agg_validation_score = inference(self.model, self.test_loader) + print( + f" Aggregated Model validation score = {self.agg_validation_score}" + ) + + self.next(self.train) + + @collaborator + def train(self): + """ + Train model on Local collab dataset. + + """ + print(": Performing Model Training on Local dataset ... ") + + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + + self.loss = train_model( + self.model, + self.optimizer, + self.train_loader, + ""), + self.round_number if self.round_number is not None else 0, + log=True, + ) + + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + """ + Validate locally trained model. + + """ + self.local_validation_score = inference(self.model, self.test_loader) + print( + f" Local model validation score = {self.local_validation_score}" + ) + self.next(self.join) + + @aggregator + def join(self, inputs): + """ + Model aggregation step. + """ + + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs + ) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs + ) / len(inputs) + + print(f": Joining models from collaborators...") + + print( + f" Aggregated model validation score = {self.aggregated_model_accuracy}" + ) + print(f" Average training loss = {self.average_loss}") + print(f" Average local model validation values = {self.local_model_accuracy}") + + self.model = FedAvg(self.model, [input.model for input in inputs]) + + self.next(self.watermark_retrain) + + @aggregator + def watermark_retrain(self): + """ + Retrain the aggregated model. + + """ + print(": Performing Watermark Retraining ... ") + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + + retrain_round = 0 + + # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + while ( + self.watermark_retrain_validation_score < self.watermark_acc_threshold + ) and (retrain_round < self.retrain_epochs): + self.watermark_retrain_train_loss = train_model( + self.model, + self.watermark_retrain_optimizer, + self.watermark_data_loader, + "", + retrain_round, + log=False, + ) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print( + ": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}".format( + self.round_number, + retrain_round, + self.watermark_retrain_train_loss, + self.watermark_retrain_validation_score, + ) + ) + + retrain_round += 1 + + self.next(self.end) + + @aggregator + def end(self): + """ + This is the last step in the Flow. + + """ + print(f"This is the end of the flow") + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 17 +# Set random seed +random_seed = 42 +torch.manual_seed(random_seed) +np.random.seed(random_seed) +torch.backends.cudnn.enabled = False + +# Batch sizes +batch_size_train = 64 +batch_size_test = 64 +batch_size_watermark = 50 + +# MNIST parameters +learning_rate = 5e-2 +momentum = 5e-1 +log_interval = 20 + +# Watermarking parameters +watermark_pretrain_learning_rate = 1e-1 +watermark_pretrain_momentum = 5e-1 +watermark_pretrain_weight_decay = 5e-05 +watermark_retrain_learning_rate = 5e-3 + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 19 +def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size): + return { + "watermark_data_loader": torch.utils.data.DataLoader( + watermark_data, batch_size=batch_size, shuffle=True + ), + "pretrain_epochs": 25, + "retrain_epochs": 25, + "watermark_acc_threshold": 0.98, + } + +# Setup Aggregator private attributes via callable function +aggregator = Aggregator( + name="agg", + private_attributes_callable=callable_to_initialize_aggregator_private_attributes, + watermark_data=watermark_data, + batch_size=batch_size_watermark, + ) + +collaborator_names = [ + "Portland", + "Seattle", + "Chandler", + "Bangalore", + "New Delhi", +] + +def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset): + train = deepcopy(train_dataset) + test = deepcopy(test_dataset) + train.data = train_dataset.data[index::n_collaborators] + train.targets = train_dataset.targets[index::n_collaborators] + test.data = test_dataset.data[index::n_collaborators] + test.targets = test_dataset.targets[index::n_collaborators] + + return { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), + } + +# Setup Collaborators private attributes via callable function +collaborators = [] +for idx, collaborator_name in enumerate(collaborator_names): + collaborators.append( + Collaborator( + name=collaborator_name, num_cpus=0, num_gpus=0, + private_attributes_callable=callable_to_initialize_collaborator_private_attributes, + index=idx, n_collaborators=len(collaborator_names), + train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64 + ) + ) + +local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend="single_process") +print(f"Local runtime collaborators = {local_runtime.collaborators}") + +# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 21 +model = Net() +optimizer = optim.SGD( + model.parameters(), lr=learning_rate, momentum=momentum +) +watermark_pretrain_optimizer = optim.SGD( + model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, +) +watermark_retrain_optimizer = optim.SGD( + model.parameters(), lr=watermark_retrain_learning_rate +) +best_model = None +round_number = 0 +top_model_accuracy = 0 + +flflow = FederatedFlow_MNIST_Watermarking( + model, + optimizer, + watermark_pretrain_optimizer, + watermark_retrain_optimizer, + round_number, + checkpoint=True, +) +flflow.runtime = local_runtime +for i in range(1): + print(f"Starting round {i}...") +# flflow.run() + flflow.round_number += 1 + if hasattr(flflow, "aggregated_model_accuracy"): + aggregated_model_accuracy = flflow.aggregated_model_accuracy + if aggregated_model_accuracy > top_model_accuracy: + print( + f"\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\n" + ) + top_model_accuracy = aggregated_model_accuracy + best_model = flflow.model + + torch.save(best_model.state_dict(), "watermarked_mnist_model.pth") diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py new file mode 100644 index 0000000000..bb4473268c --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py @@ -0,0 +1,73 @@ +# Copyright (C) 2020-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import shutil +import filecmp +from pathlib import Path +from openfl.experimental.workflow.notebooktools import NotebookTools + +# Define paths +NOTEBOOK_PATH = "testcase_export/301_MNIST_Watermarking.ipynb" +ACTUAL_DIR = "testcase_export/test_artifacts/actual" +EXPECTED_DIR = "testcase_export/test_artifacts/expected" + +def setup_workspace(): + """Setup function to create the actual workspace for testing.""" + # Ensure the actual directory is empty + if Path(ACTUAL_DIR).exists(): + shutil.rmtree(ACTUAL_DIR) + Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) + + # Generate workspace using NotebookTools + NotebookTools.export( + notebook_path=NOTEBOOK_PATH, + output_workspace=ACTUAL_DIR + ) + +def compare_files(file1, file2): + """Compare the content of two files, ignoring commentted lines.""" + with open(file1, "r") as f1, open(file2, "r") as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + # Remove comment lines (lines starting with '#') + lines1 = [line for line in lines1 if not line.startswith("#")] + lines2 = [line for line in lines2 if not line.startswith("#")] + + return lines1 == lines2 + +def compare_directories(dir1, dir2): + """Compare two directories recursively, including file content.""" + comparison = filecmp.dircmp(dir1, dir2) + # Check for differences in file names or structure + if comparison.left_only or comparison.right_only: + return False + + # Compare subdirectories + for subdir in comparison.common_dirs: + if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): + return False + + # Compare file content for all common files + for file in comparison.common_files: + file1 = Path(dir1) / file + file2 = Path(dir2) / file + print(f"Comparing files: {file1} and {file2}") + if not compare_files(file1, file2): + return False + + return True + +def test_export_functionality(): + """ + Test that the workspace generated by NotebookTools matches the Expected Artifacts. + + This function compares the contents of the actual directory generated by + NotebookTools with the expected directory. + """ + # Compare the expected and actual directories + assert compare_directories(EXPECTED_DIR, ACTUAL_DIR), ( + "The workspace generated by NotebookTools does not match the expected. " + "Check the differences in the test_artifacts/expected and test_artifacts/actual folders." + ) + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb new file mode 100644 index 0000000000..0ee4c67681 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb @@ -0,0 +1,587 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Federated Runtime: 301_MNIST_Watermarking" + ] + }, + { + "cell_type": "markdown", + "id": "3b7357ef", + "metadata": {}, + "source": [ + "This tutorial is based on the LocalRuntime example [301_MNIST_Watermarking](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb). It has been adapted to demonstrate the FederatedRuntime version of the watermarking workflow. In this tutorial, we will guide you through the process of deploying the watermarking example within a federation, showcasing how to transition from a local setup to a federated environment effectively." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "markdown", + "id": "2e19dcf2", + "metadata": {}, + "source": [ + "We start by installing OpenFL and dependencies of the workflow interface \n", + "> These dependencies are required to be exported and become the requirements for the Federated Learning Workspace " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f7475cba", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "!pip install git+https://github.com/securefederatedai/openfl.git\n", + "!pip install -r ../../../workflow_interface_requirements.txt\n", + "!pip install matplotlib\n", + "!pip install torch==2.3.1\n", + "!pip install torchvision==0.18.1\n", + "!pip install git+https://github.com/pyviz-topics/imagen.git@master\n", + "!pip install holoviews==1.15.4\n", + "!pip install -U ipywidgets" + ] + }, + { + "cell_type": "markdown", + "id": "9a6ae8e2", + "metadata": {}, + "source": [ + "We now define our model, optimizer, and some helper functions like we would for any other deep learning experiment \n", + "\n", + "> This cell and all the subsequent cells are important ingredients of the Federated Learning experiment and therefore annotated with the `#| export` directive" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "import numpy as np\n", + "\n", + "random_seed = 1\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self, dropout=0.0):\n", + " super(Net, self).__init__()\n", + " self.dropout = dropout\n", + " self.block = nn.Sequential(\n", + " nn.Conv2d(1, 32, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(32, 64, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(64, 128, 2),\n", + " nn.ReLU(),\n", + " )\n", + " self.fc1 = nn.Linear(128 * 5**2, 200)\n", + " self.fc2 = nn.Linear(200, 10)\n", + " self.relu = nn.ReLU()\n", + " self.dropout = nn.Dropout(p=dropout)\n", + "\n", + " def forward(self, x):\n", + " x = self.dropout(x)\n", + " out = self.block(x)\n", + " out = out.view(-1, 128 * 5**2)\n", + " out = self.dropout(out)\n", + " out = self.relu(self.fc1(out))\n", + " out = self.dropout(out)\n", + " out = self.fc2(out)\n", + " return F.log_softmax(out, 1)\n", + "\n", + "\n", + "def inference(network, test_loader):\n", + " network.eval()\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = network(data)\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy\n", + "\n", + "\n", + "def train_model(model, optimizer, data_loader, entity, round_number, log=False):\n", + " # Helper function to train the model\n", + " train_loss = 0\n", + " log_interval = 20\n", + " model.train()\n", + " for batch_idx, (X, y) in enumerate(data_loader):\n", + " optimizer.zero_grad()\n", + "\n", + " output = model(X)\n", + " loss = F.nll_loss(output, y)\n", + " loss.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " train_loss += loss.item() * len(X)\n", + " if batch_idx % log_interval == 0 and log:\n", + " print(\"{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}\".format(\n", + " entity,\n", + " round_number,\n", + " batch_idx * len(X),\n", + " len(data_loader.dataset),\n", + " 100.0 * batch_idx / len(data_loader),\n", + " loss.item(),\n", + " )\n", + " )\n", + " train_loss /= len(data_loader.dataset)\n", + " return train_loss" + ] + }, + { + "cell_type": "markdown", + "id": "d0849d57", + "metadata": {}, + "source": [ + "Next we import the `FLSpec` & placement decorators (`aggregator/collaborator`)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "def FedAvg(agg_model, models, weights=None):\n", + " state_dicts = [model.state_dict() for model in models]\n", + " state_dict = agg_model.state_dict()\n", + " for key in models[0].state_dict():\n", + " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],\n", + " axis=0, \n", + " weights=weights))\n", + " \n", + " agg_model.load_state_dict(state_dict)\n", + " return agg_model" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the Workflow for Watermark embedding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class FederatedFlow_MNIST_Watermarking(FLSpec):\n", + " \"\"\"\n", + " This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning\n", + " Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model=None,\n", + " optimizer=None,\n", + " watermark_pretrain_optimizer=None,\n", + " watermark_retrain_optimizer=None,\n", + " round_number=0,\n", + " n_rounds=3,\n", + " **kwargs,\n", + " ):\n", + " super().__init__(**kwargs)\n", + "\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " self.watermark_pretrain_optimizer = watermark_pretrain_optimizer\n", + " self.watermark_retrain_optimizer = watermark_retrain_optimizer\n", + " else:\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + " self.watermark_pretrain_optimizer = optim.SGD(\n", + " self.model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + " )\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + " self.round_number = round_number\n", + " self.n_rounds = n_rounds\n", + " self.watermark_pretraining_completed = False\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " This is the start of the Flow.\n", + " \"\"\"\n", + " print(\": Start of flow ... \")\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " self.next(self.watermark_pretrain)\n", + "\n", + " @aggregator\n", + " def watermark_pretrain(self):\n", + " \"\"\"\n", + " Pre-Train the Model before starting Federated Learning.\n", + " \"\"\"\n", + " if not self.watermark_pretraining_completed:\n", + "\n", + " print(\": Performing Watermark Pre-training\")\n", + "\n", + " for i in range(self.pretrain_epochs):\n", + "\n", + " watermark_pretrain_loss = train_model(\n", + " self.model,\n", + " self.watermark_pretrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \":\",\n", + " i,\n", + " log=False,\n", + " )\n", + " watermark_pretrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(f\": Watermark Pretraining: Round: {i:<3}\"\n", + " + f\" Loss: {watermark_pretrain_loss:<.6f}\"\n", + " + f\" Acc: {watermark_pretrain_validation_score:<.6f}\")\n", + "\n", + " self.watermark_pretraining_completed = True\n", + "\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"collaborators\",\n", + " )\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " \"\"\"\n", + " Perform Aggregated Model validation on Collaborators.\n", + " \"\"\"\n", + " self.agg_validation_score = inference(self.model, self.test_loader)\n", + " print(f\"\"\n", + " + f\" Aggregated Model validation score = {self.agg_validation_score}\"\n", + " )\n", + "\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " \"\"\"\n", + " Train model on Local collab dataset.\n", + " \"\"\"\n", + " print(\": Performing Model Training on Local dataset ... \")\n", + "\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + "\n", + " self.loss = train_model(\n", + " self.model,\n", + " self.optimizer,\n", + " self.train_loader,\n", + " f\"\",\n", + " self.round_number,\n", + " log=True,\n", + " )\n", + "\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " \"\"\"\n", + " Validate locally trained model.\n", + " \"\"\"\n", + " self.local_validation_score = inference(self.model, self.test_loader)\n", + " print(\n", + " f\" Local model validation score = {self.local_validation_score}\"\n", + " )\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Model aggregation step.\n", + " \"\"\"\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score for input in inputs\n", + " ) / len(inputs)\n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score for input in inputs\n", + " ) / len(inputs)\n", + "\n", + " print(\": Joining models from collaborators...\")\n", + "\n", + " print(\n", + " f\" Aggregated model validation score = {self.aggregated_model_accuracy}\"\n", + " )\n", + " print(f\" Average training loss = {self.average_loss}\")\n", + " print(f\" Average local model validation values = {self.local_model_accuracy}\")\n", + "\n", + " self.model = FedAvg(self.model, [input.model for input in inputs])\n", + "\n", + " self.next(self.watermark_retrain)\n", + "\n", + " @aggregator\n", + " def watermark_retrain(self):\n", + " \"\"\"\n", + " Retrain the aggregated model.\n", + " \"\"\"\n", + " print(\": Performing Watermark Retraining ... \")\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + "\n", + " retrain_round = 0\n", + "\n", + " # Perform re-training until (accuracy >= acc_threshold) or\n", + " # (retrain_round > number of retrain_epochs)\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + " while (\n", + " self.watermark_retrain_validation_score < self.watermark_acc_threshold\n", + " ) and (retrain_round < self.retrain_epochs):\n", + " self.watermark_retrain_train_loss = train_model(\n", + " self.model,\n", + " self.watermark_retrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \"\",\n", + " retrain_round,\n", + " log=False,\n", + " )\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(f\": Watermark Retraining: Train Epoch: {self.round_number:<3}\"\n", + " + f\" Retrain Round: {retrain_round:<3}\"\n", + " + f\" Loss: {self.watermark_retrain_train_loss:<.6f},\"\n", + " + f\" Acc: {self.watermark_retrain_validation_score:<.6f}\")\n", + " retrain_round += 1\n", + "\n", + " self.next(self.internal_loop)\n", + " \n", + " @aggregator\n", + " def internal_loop(self):\n", + " \"\"\"\n", + " Internal loop to continue the Federated Learning process.\n", + " \"\"\"\n", + " if self.round_number == self.n_rounds - 1:\n", + " print(f\"\\nCompleted training for all {self.n_rounds} round(s)\")\n", + " self.next(self.end)\n", + " else:\n", + " self.round_number += 1\n", + " print(f\"\\nCompleted round: {self.round_number}\")\n", + " self.next(self.aggregated_model_validation, foreach='collaborators')\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the last step in the Flow.\n", + " \"\"\"\n", + " print(\"This is the end of the flow\")" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "## Defining and Initializing the Federated Runtime\n", + "We initialize the Federated Runtime by providing:\n", + "- `director_info`: The director's connection information \n", + "- `authorized_collaborators`: A list of authorized collaborators\n", + "- `notebook_path`: Path to this Jupyter notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + "}\n", + "\n", + "authorized_collaborators = ['Bangalore', 'Chandler']\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators=authorized_collaborators,\n", + " director=director_info, \n", + " notebook_path='./MNIST_Watermarking.ipynb',\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6de9684f", + "metadata": {}, + "source": [ + "The status of the connected Envoys can be checked using the `get_envoys()` method of the `federated_runtime`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "markdown", + "id": "0eaeca25", + "metadata": {}, + "source": [ + "With the federated_runtime now instantiated, we will proceed to deploy the watermarking workspace and run the experiment!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# Set random seed\n", + "random_seed = 42\n", + "torch.manual_seed(random_seed)\n", + "np.random.seed(random_seed)\n", + "torch.backends.cudnn.enabled = False\n", + "\n", + "# MNIST parameters\n", + "learning_rate = 5e-2\n", + "momentum = 5e-1\n", + "log_interval = 20\n", + "\n", + "# Watermarking parameters\n", + "watermark_pretrain_learning_rate = 1e-1\n", + "watermark_pretrain_momentum = 5e-1\n", + "watermark_pretrain_weight_decay = 5e-05\n", + "watermark_retrain_learning_rate = 5e-3\n", + "\n", + "model = Net()\n", + "optimizer = optim.SGD(\n", + " model.parameters(), lr=learning_rate, momentum=momentum\n", + ")\n", + "watermark_pretrain_optimizer = optim.SGD(\n", + " model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + ")\n", + "watermark_retrain_optimizer = optim.SGD(\n", + " model.parameters(), lr=watermark_retrain_learning_rate\n", + ")\n", + "\n", + "flflow = FederatedFlow_MNIST_Watermarking(\n", + " model,\n", + " optimizer,\n", + " watermark_pretrain_optimizer,\n", + " watermark_retrain_optimizer,\n", + " checkpoint=True,\n", + ")\n", + "flflow.runtime = federated_runtime\n", + "flflow.run()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml new file mode 100644 index 0000000000..f29bada0f1 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml @@ -0,0 +1,25 @@ +aggregator: + defaults: plan/defaults/aggregator.yaml + settings: + rounds_to_train: 1 + template: openfl.experimental.workflow.component.Aggregator +collaborator: + defaults: plan/defaults/collaborator.yaml + settings: {} + template: openfl.experimental.workflow.component.Collaborator +federated_flow: + settings: + checkpoint: true + model: src.experiment.model + optimizer: src.experiment.optimizer + watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer + watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer + template: src.experiment.FederatedFlow_MNIST_Watermarking +network: + settings: + agg_addr: localhost + agg_port: 53798 + client_reconnect_interval: 5 + disable_client_auth: false + tls: false + template: openfl.federation.Network diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt new file mode 100644 index 0000000000..2a7f08eab8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt @@ -0,0 +1,7 @@ +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +matplotlib +torch==2.3.1 +torchvision==0.18.1 +git+https://github.com/pyviz-topics/imagen.git@master +holoviews==1.15.4 +ipywidgets diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py new file mode 100644 index 0000000000..49883934a8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py new file mode 100644 index 0000000000..3ac90ade4d --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py @@ -0,0 +1,380 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../MNIST_Watermarking.ipynb. + +# %% auto 0 +__all__ = ['random_seed', 'director_info', 'authorized_collaborators', 'federated_runtime', 'learning_rate', 'momentum', + 'log_interval', 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', + 'watermark_pretrain_weight_decay', 'watermark_retrain_learning_rate', 'model', 'optimizer', + 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'flflow', 'Net', 'inference', 'train_model', + 'FedAvg', 'FederatedFlow_MNIST_Watermarking'] + +# %% ../../../MNIST_Watermarking.ipynb 7 + +# %% ../../../MNIST_Watermarking.ipynb 9 +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch +import numpy as np + +random_seed = 1 +torch.backends.cudnn.enabled = False +torch.manual_seed(random_seed) + +class Net(nn.Module): + def __init__(self, dropout=0.0): + super(Net, self).__init__() + self.dropout = dropout + self.block = nn.Sequential( + nn.Conv2d(1, 32, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(32, 64, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(64, 128, 2), + nn.ReLU(), + ) + self.fc1 = nn.Linear(128 * 5**2, 200) + self.fc2 = nn.Linear(200, 10) + self.relu = nn.ReLU() + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + x = self.dropout(x) + out = self.block(x) + out = out.view(-1, 128 * 5**2) + out = self.dropout(out) + out = self.relu(self.fc1(out)) + out = self.dropout(out) + out = self.fc2(out) + return F.log_softmax(out, 1) + + +def inference(network, test_loader): + network.eval() + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + output = network(data) + pred = output.data.max(1, keepdim=True)[1] + correct += pred.eq(target.data.view_as(pred)).sum() + accuracy = float(correct / len(test_loader.dataset)) + return accuracy + + +def train_model(model, optimizer, data_loader, entity, round_number, log=False): + # Helper function to train the model + train_loss = 0 + log_interval = 20 + model.train() + for batch_idx, (X, y) in enumerate(data_loader): + optimizer.zero_grad() + + output = model(X) + loss = F.nll_loss(output, y) + loss.backward() + + optimizer.step() + + train_loss += loss.item() * len(X) + if batch_idx % log_interval == 0 and log: + print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( + entity, + round_number, + batch_idx * len(X), + len(data_loader.dataset), + 100.0 * batch_idx / len(data_loader), + loss.item(), + ) + ) + train_loss /= len(data_loader.dataset) + return train_loss + +# %% ../../../MNIST_Watermarking.ipynb 11 +from openfl.experimental.workflow.interface import FLSpec +from openfl.experimental.workflow.placement import aggregator, collaborator + +def FedAvg(agg_model, models, weights=None): + state_dicts = [model.state_dict() for model in models] + state_dict = agg_model.state_dict() + for key in models[0].state_dict(): + state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], + axis=0, + weights=weights)) + + agg_model.load_state_dict(state_dict) + return agg_model + +# %% ../../../MNIST_Watermarking.ipynb 13 +class FederatedFlow_MNIST_Watermarking(FLSpec): + """ + This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning + Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + """ + + def __init__( + self, + model=None, + optimizer=None, + watermark_pretrain_optimizer=None, + watermark_retrain_optimizer=None, + round_number=0, + n_rounds=3, + **kwargs, + ): + super().__init__(**kwargs) + + if model is not None: + self.model = model + self.optimizer = optimizer + self.watermark_pretrain_optimizer = watermark_pretrain_optimizer + self.watermark_retrain_optimizer = watermark_retrain_optimizer + else: + self.model = Net() + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + self.watermark_pretrain_optimizer = optim.SGD( + self.model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, + ) + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + self.round_number = round_number + self.n_rounds = n_rounds + self.watermark_pretraining_completed = False + + @aggregator + def start(self): + """ + This is the start of the Flow. + """ + print(": Start of flow ... ") + self.collaborators = self.runtime.collaborators + + self.next(self.watermark_pretrain) + + @aggregator + def watermark_pretrain(self): + """ + Pre-Train the Model before starting Federated Learning. + """ + if not self.watermark_pretraining_completed: + + print(": Performing Watermark Pre-training") + + for i in range(self.pretrain_epochs): + + watermark_pretrain_loss = train_model( + self.model, + self.watermark_pretrain_optimizer, + self.watermark_data_loader, + ":", + i, + log=False, + ) + watermark_pretrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print(f": Watermark Pretraining: Round: {i:<3}" + + f" Loss: {watermark_pretrain_loss:<.6f}" + + f" Acc: {watermark_pretrain_validation_score:<.6f}") + + self.watermark_pretraining_completed = True + + self.next( + self.aggregated_model_validation, + foreach="collaborators", + ) + + @collaborator + def aggregated_model_validation(self): + """ + Perform Aggregated Model validation on Collaborators. + """ + self.agg_validation_score = inference(self.model, self.test_loader) + print(f"" + + f" Aggregated Model validation score = {self.agg_validation_score}" + ) + + self.next(self.train) + + @collaborator + def train(self): + """ + Train model on Local collab dataset. + """ + print(": Performing Model Training on Local dataset ... ") + + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + + self.loss = train_model( + self.model, + self.optimizer, + self.train_loader, + f"", + self.round_number, + log=True, + ) + + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + """ + Validate locally trained model. + """ + self.local_validation_score = inference(self.model, self.test_loader) + print( + f" Local model validation score = {self.local_validation_score}" + ) + self.next(self.join) + + @aggregator + def join(self, inputs): + """ + Model aggregation step. + """ + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs + ) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs + ) / len(inputs) + + print(": Joining models from collaborators...") + + print( + f" Aggregated model validation score = {self.aggregated_model_accuracy}" + ) + print(f" Average training loss = {self.average_loss}") + print(f" Average local model validation values = {self.local_model_accuracy}") + + self.model = FedAvg(self.model, [input.model for input in inputs]) + + self.next(self.watermark_retrain) + + @aggregator + def watermark_retrain(self): + """ + Retrain the aggregated model. + """ + print(": Performing Watermark Retraining ... ") + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + + retrain_round = 0 + + # Perform re-training until (accuracy >= acc_threshold) or + # (retrain_round > number of retrain_epochs) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + while ( + self.watermark_retrain_validation_score < self.watermark_acc_threshold + ) and (retrain_round < self.retrain_epochs): + self.watermark_retrain_train_loss = train_model( + self.model, + self.watermark_retrain_optimizer, + self.watermark_data_loader, + "", + retrain_round, + log=False, + ) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print(f": Watermark Retraining: Train Epoch: {self.round_number:<3}" + + f" Retrain Round: {retrain_round:<3}" + + f" Loss: {self.watermark_retrain_train_loss:<.6f}," + + f" Acc: {self.watermark_retrain_validation_score:<.6f}") + retrain_round += 1 + + self.next(self.internal_loop) + + @aggregator + def internal_loop(self): + """ + Internal loop to continue the Federated Learning process. + """ + if self.round_number == self.n_rounds - 1: + print(f"\nCompleted training for all {self.n_rounds} round(s)") + self.next(self.end) + else: + self.round_number += 1 + print(f"\nCompleted round: {self.round_number}") + self.next(self.aggregated_model_validation, foreach='collaborators') + + @aggregator + def end(self): + """ + This is the last step in the Flow. + """ + print("This is the end of the flow") + +# %% ../../../MNIST_Watermarking.ipynb 15 +from openfl.experimental.workflow.runtime import FederatedRuntime + +director_info = { + 'director_node_fqdn':'localhost', + 'director_port':50050, +} + +authorized_collaborators = ['Bangalore', 'Chandler'] + +federated_runtime = FederatedRuntime( + collaborators=authorized_collaborators, + director=director_info, + notebook_path='./MNIST_Watermarking.ipynb', +) + +# %% ../../../MNIST_Watermarking.ipynb 19 +# Set random seed +random_seed = 42 +torch.manual_seed(random_seed) +np.random.seed(random_seed) +torch.backends.cudnn.enabled = False + +# MNIST parameters +learning_rate = 5e-2 +momentum = 5e-1 +log_interval = 20 + +# Watermarking parameters +watermark_pretrain_learning_rate = 1e-1 +watermark_pretrain_momentum = 5e-1 +watermark_pretrain_weight_decay = 5e-05 +watermark_retrain_learning_rate = 5e-3 + +model = Net() +optimizer = optim.SGD( + model.parameters(), lr=learning_rate, momentum=momentum +) +watermark_pretrain_optimizer = optim.SGD( + model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, +) +watermark_retrain_optimizer = optim.SGD( + model.parameters(), lr=watermark_retrain_learning_rate +) + +flflow = FederatedFlow_MNIST_Watermarking( + model, + optimizer, + watermark_pretrain_optimizer, + watermark_retrain_optimizer, + checkpoint=True, +) +flflow.runtime = federated_runtime +# flflow.run() diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml new file mode 100644 index 0000000000..f29bada0f1 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml @@ -0,0 +1,25 @@ +aggregator: + defaults: plan/defaults/aggregator.yaml + settings: + rounds_to_train: 1 + template: openfl.experimental.workflow.component.Aggregator +collaborator: + defaults: plan/defaults/collaborator.yaml + settings: {} + template: openfl.experimental.workflow.component.Collaborator +federated_flow: + settings: + checkpoint: true + model: src.experiment.model + optimizer: src.experiment.optimizer + watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer + watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer + template: src.experiment.FederatedFlow_MNIST_Watermarking +network: + settings: + agg_addr: localhost + agg_port: 53798 + client_reconnect_interval: 5 + disable_client_auth: false + tls: false + template: openfl.federation.Network diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt new file mode 100644 index 0000000000..2a7f08eab8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt @@ -0,0 +1,7 @@ +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +matplotlib +torch==2.3.1 +torchvision==0.18.1 +git+https://github.com/pyviz-topics/imagen.git@master +holoviews==1.15.4 +ipywidgets diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py new file mode 100644 index 0000000000..49883934a8 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2020-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py new file mode 100644 index 0000000000..bfca717881 --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py @@ -0,0 +1,380 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../MNIST_Watermarking.ipynb. + +# %% auto 0 +__all__ = ['random_seed', 'director_info', 'authorized_collaborators', 'federated_runtime', 'learning_rate', 'momentum', + 'log_interval', 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', + 'watermark_pretrain_weight_decay', 'watermark_retrain_learning_rate', 'model', 'optimizer', + 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'flflow', 'Net', 'inference', 'train_model', + 'FedAvg', 'FederatedFlow_MNIST_Watermarking'] + +# %% ../../MNIST_Watermarking.ipynb 7 + +# %% ../../MNIST_Watermarking.ipynb 9 +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch +import numpy as np + +random_seed = 1 +torch.backends.cudnn.enabled = False +torch.manual_seed(random_seed) + +class Net(nn.Module): + def __init__(self, dropout=0.0): + super(Net, self).__init__() + self.dropout = dropout + self.block = nn.Sequential( + nn.Conv2d(1, 32, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(32, 64, 2), + nn.MaxPool2d(2), + nn.ReLU(), + nn.Conv2d(64, 128, 2), + nn.ReLU(), + ) + self.fc1 = nn.Linear(128 * 5**2, 200) + self.fc2 = nn.Linear(200, 10) + self.relu = nn.ReLU() + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + x = self.dropout(x) + out = self.block(x) + out = out.view(-1, 128 * 5**2) + out = self.dropout(out) + out = self.relu(self.fc1(out)) + out = self.dropout(out) + out = self.fc2(out) + return F.log_softmax(out, 1) + + +def inference(network, test_loader): + network.eval() + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + output = network(data) + pred = output.data.max(1, keepdim=True)[1] + correct += pred.eq(target.data.view_as(pred)).sum() + accuracy = float(correct / len(test_loader.dataset)) + return accuracy + + +def train_model(model, optimizer, data_loader, entity, round_number, log=False): + # Helper function to train the model + train_loss = 0 + log_interval = 20 + model.train() + for batch_idx, (X, y) in enumerate(data_loader): + optimizer.zero_grad() + + output = model(X) + loss = F.nll_loss(output, y) + loss.backward() + + optimizer.step() + + train_loss += loss.item() * len(X) + if batch_idx % log_interval == 0 and log: + print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( + entity, + round_number, + batch_idx * len(X), + len(data_loader.dataset), + 100.0 * batch_idx / len(data_loader), + loss.item(), + ) + ) + train_loss /= len(data_loader.dataset) + return train_loss + +# %% ../../MNIST_Watermarking.ipynb 11 +from openfl.experimental.workflow.interface import FLSpec +from openfl.experimental.workflow.placement import aggregator, collaborator + +def FedAvg(agg_model, models, weights=None): + state_dicts = [model.state_dict() for model in models] + state_dict = agg_model.state_dict() + for key in models[0].state_dict(): + state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], + axis=0, + weights=weights)) + + agg_model.load_state_dict(state_dict) + return agg_model + +# %% ../../MNIST_Watermarking.ipynb 13 +class FederatedFlow_MNIST_Watermarking(FLSpec): + """ + This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning + Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + """ + + def __init__( + self, + model=None, + optimizer=None, + watermark_pretrain_optimizer=None, + watermark_retrain_optimizer=None, + round_number=0, + n_rounds=3, + **kwargs, + ): + super().__init__(**kwargs) + + if model is not None: + self.model = model + self.optimizer = optimizer + self.watermark_pretrain_optimizer = watermark_pretrain_optimizer + self.watermark_retrain_optimizer = watermark_retrain_optimizer + else: + self.model = Net() + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + self.watermark_pretrain_optimizer = optim.SGD( + self.model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, + ) + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + self.round_number = round_number + self.n_rounds = n_rounds + self.watermark_pretraining_completed = False + + @aggregator + def start(self): + """ + This is the start of the Flow. + """ + print(": Start of flow ... ") + self.collaborators = self.runtime.collaborators + + self.next(self.watermark_pretrain) + + @aggregator + def watermark_pretrain(self): + """ + Pre-Train the Model before starting Federated Learning. + """ + if not self.watermark_pretraining_completed: + + print(": Performing Watermark Pre-training") + + for i in range(self.pretrain_epochs): + + watermark_pretrain_loss = train_model( + self.model, + self.watermark_pretrain_optimizer, + self.watermark_data_loader, + ":", + i, + log=False, + ) + watermark_pretrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print(f": Watermark Pretraining: Round: {i:<3}" + + f" Loss: {watermark_pretrain_loss:<.6f}" + + f" Acc: {watermark_pretrain_validation_score:<.6f}") + + self.watermark_pretraining_completed = True + + self.next( + self.aggregated_model_validation, + foreach="collaborators", + ) + + @collaborator + def aggregated_model_validation(self): + """ + Perform Aggregated Model validation on Collaborators. + """ + self.agg_validation_score = inference(self.model, self.test_loader) + print(f"" + + f" Aggregated Model validation score = {self.agg_validation_score}" + ) + + self.next(self.train) + + @collaborator + def train(self): + """ + Train model on Local collab dataset. + """ + print(": Performing Model Training on Local dataset ... ") + + self.optimizer = optim.SGD( + self.model.parameters(), lr=learning_rate, momentum=momentum + ) + + self.loss = train_model( + self.model, + self.optimizer, + self.train_loader, + f"", + self.round_number, + log=True, + ) + + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + """ + Validate locally trained model. + """ + self.local_validation_score = inference(self.model, self.test_loader) + print( + f" Local model validation score = {self.local_validation_score}" + ) + self.next(self.join) + + @aggregator + def join(self, inputs): + """ + Model aggregation step. + """ + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs + ) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs + ) / len(inputs) + + print(": Joining models from collaborators...") + + print( + f" Aggregated model validation score = {self.aggregated_model_accuracy}" + ) + print(f" Average training loss = {self.average_loss}") + print(f" Average local model validation values = {self.local_model_accuracy}") + + self.model = FedAvg(self.model, [input.model for input in inputs]) + + self.next(self.watermark_retrain) + + @aggregator + def watermark_retrain(self): + """ + Retrain the aggregated model. + """ + print(": Performing Watermark Retraining ... ") + self.watermark_retrain_optimizer = optim.SGD( + self.model.parameters(), lr=watermark_retrain_learning_rate + ) + + retrain_round = 0 + + # Perform re-training until (accuracy >= acc_threshold) or + # (retrain_round > number of retrain_epochs) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + while ( + self.watermark_retrain_validation_score < self.watermark_acc_threshold + ) and (retrain_round < self.retrain_epochs): + self.watermark_retrain_train_loss = train_model( + self.model, + self.watermark_retrain_optimizer, + self.watermark_data_loader, + "", + retrain_round, + log=False, + ) + self.watermark_retrain_validation_score = inference( + self.model, self.watermark_data_loader + ) + + print(f": Watermark Retraining: Train Epoch: {self.round_number:<3}" + + f" Retrain Round: {retrain_round:<3}" + + f" Loss: {self.watermark_retrain_train_loss:<.6f}," + + f" Acc: {self.watermark_retrain_validation_score:<.6f}") + retrain_round += 1 + + self.next(self.internal_loop) + + @aggregator + def internal_loop(self): + """ + Internal loop to continue the Federated Learning process. + """ + if self.round_number == self.n_rounds - 1: + print(f"\nCompleted training for all {self.n_rounds} round(s)") + self.next(self.end) + else: + self.round_number += 1 + print(f"\nCompleted round: {self.round_number}") + self.next(self.aggregated_model_validation, foreach='collaborators') + + @aggregator + def end(self): + """ + This is the last step in the Flow. + """ + print("This is the end of the flow") + +# %% ../../MNIST_Watermarking.ipynb 15 +from openfl.experimental.workflow.runtime import FederatedRuntime + +director_info = { + 'director_node_fqdn':'localhost', + 'director_port':50050, +} + +authorized_collaborators = ['Bangalore', 'Chandler'] + +federated_runtime = FederatedRuntime( + collaborators=authorized_collaborators, + director=director_info, + notebook_path='./MNIST_Watermarking.ipynb', +) + +# %% ../../MNIST_Watermarking.ipynb 19 +# Set random seed +random_seed = 42 +torch.manual_seed(random_seed) +np.random.seed(random_seed) +torch.backends.cudnn.enabled = False + +# MNIST parameters +learning_rate = 5e-2 +momentum = 5e-1 +log_interval = 20 + +# Watermarking parameters +watermark_pretrain_learning_rate = 1e-1 +watermark_pretrain_momentum = 5e-1 +watermark_pretrain_weight_decay = 5e-05 +watermark_retrain_learning_rate = 5e-3 + +model = Net() +optimizer = optim.SGD( + model.parameters(), lr=learning_rate, momentum=momentum +) +watermark_pretrain_optimizer = optim.SGD( + model.parameters(), + lr=watermark_pretrain_learning_rate, + momentum=watermark_pretrain_momentum, + weight_decay=watermark_pretrain_weight_decay, +) +watermark_retrain_optimizer = optim.SGD( + model.parameters(), lr=watermark_retrain_learning_rate +) + +flflow = FederatedFlow_MNIST_Watermarking( + model, + optimizer, + watermark_pretrain_optimizer, + watermark_retrain_optimizer, + checkpoint=True, +) +flflow.runtime = federated_runtime +# flflow.run() diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py new file mode 100644 index 0000000000..901e403dfa --- /dev/null +++ b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py @@ -0,0 +1,95 @@ +# Copyright (C) 2020-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import shutil +import filecmp +from pathlib import Path +from openfl.experimental.workflow.runtime import FederatedRuntime +from openfl.experimental.workflow.notebooktools import NotebookTools + +# Define paths +NOTEBOOK_PATH = "testcase_export_federated/MNIST_Watermarking.ipynb" +ACTUAL_DIR = "testcase_export_federated/test_artifacts/actual" +EXPECTED_DIR = "testcase_export_federated/test_artifacts/expected" + +# Setup for FederatedRuntime +director_info = { + 'director_node_fqdn': 'localhost', + 'director_port': 50050, +} + +authorized_collaborators = ['Bangalore', 'Chandler'] + +# Creating an instance of FederatedRuntime +federated_runtime = FederatedRuntime( + collaborators=authorized_collaborators, + director=director_info, + notebook_path=NOTEBOOK_PATH, + tls=False # Actual testcase tls is set to false +) + +def setup_workspace(): + """Setup function to create the actual workspace for testing.""" + # Ensure the actual directory is empty + if Path(ACTUAL_DIR).exists(): + shutil.rmtree(ACTUAL_DIR) + Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) + + # Use the FederatedRuntime instance to get the parameters + notebook_path = federated_runtime.notebook_path + director_fqdn = federated_runtime.director["director_node_fqdn"] + tls = federated_runtime.tls + + # Generate workspace using NotebookTools + NotebookTools.export_federated( + notebook_path=notebook_path, + output_workspace=ACTUAL_DIR, + director_fqdn=director_fqdn, + tls=tls + ) + +def compare_files(file1, file2): + """Compare the content of two files, ignoring comment lines (lines starting with '#').""" + with open(file1, "r") as f1, open(file2, "r") as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + # Remove comment lines (lines starting with '#') + lines1 = [line for line in lines1 if not line.startswith("#")] + lines2 = [line for line in lines2 if not line.startswith("#")] + + return lines1 == lines2 + +def compare_directories(dir1, dir2): + """Compare two directories recursively, including file content.""" + comparison = filecmp.dircmp(dir1, dir2) + + # Check for differences in file names or structure + if comparison.left_only or comparison.right_only: + return False + + # Compare subdirectories + for subdir in comparison.common_dirs: + if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): + return False + + # Compare file content for all common files + for file in comparison.common_files: + file1 = Path(dir1) / file + file2 = Path(dir2) / file + if not compare_files(file1, file2): + return False + + return True + +def test_export_federated_functionality(): + """Test that the workspace generated by NotebookTools matches the Expected Artifacts. + + This function compares the contents of the actual directory generated by + NotebookTools with the expected directory. + """ + # Compare the expected and actual directories + assert compare_directories(EXPECTED_DIR, ACTUAL_DIR), ( + "The workspace generated by NotebookTools does not match the expected. " + "Check the differences in the test_artifacts/expected and test_artifacts/actual folders." + ) \ No newline at end of file From 43faf7873e9c6af7dcd11b69cd37da94106c4be0 Mon Sep 17 00:00:00 2001 From: refai06 Date: Wed, 19 Feb 2025 10:21:40 +0530 Subject: [PATCH 5/7] Code Enhancement Signed-off-by: refai06 --- .../workflow/notebooktools/code_analyzer.py | 36 +++--- .../workflow/notebooktools/notebook_tools.py | 114 ++++++++++++------ 2 files changed, 95 insertions(+), 55 deletions(-) diff --git a/openfl/experimental/workflow/notebooktools/code_analyzer.py b/openfl/experimental/workflow/notebooktools/code_analyzer.py index 2a065c52fc..d0677cef71 100644 --- a/openfl/experimental/workflow/notebooktools/code_analyzer.py +++ b/openfl/experimental/workflow/notebooktools/code_analyzer.py @@ -17,7 +17,8 @@ class CodeAnalyzer: - """Code analysis and transformation functionality for NotebookTools + """Analyzes and process Jupyter Notebooks. + Provides code extraction and transformation functionality to NotebookTools Attributes: script_path: Absolute path to python script. @@ -28,18 +29,14 @@ def __init__(self, notebook_path: Path, output_path: Path) -> None: """Initialize CodeAnalzer and process the script from notebook Args: - notebook_path (Path): The path to the Jupyter notebook that needs to be converted. + notebook_path (Path): Path to Jupyter notebook to be converted. output_path (Path): The directory where the converted Python script will be saved. """ logger.info("Converting jupter notebook to python script...") # Extract the export filename from the notebook export_filename = self.__get_exp_name(notebook_path) - if export_filename is None: - raise NameError( - "Please include `#| default_exp ` in " - "the first cell of the notebook." - ) + # Convert the notebook to a Python script and set the script path self.script_path = Path( self.__convert_to_python( @@ -51,15 +48,14 @@ def __init__(self, notebook_path: Path, output_path: Path) -> None: # Generated python script name self.script_name = self.script_path.name.split(".")[0].strip() - # Comment out flow.run() to prevent the flow from starting execution - # automatically when the script is imported. - self.__comment_flow_execution() - - # Change the runtime backend from 'ray' to 'single_process' - self.__change_runtime() + # Transform the script + self._transform_script() def __get_exp_name(self, notebook_path: Path) -> str: - """Fetch the experiment name from the Jupyter notebook. + """Extract experiment name from Jupyter notebook + Looks for '#| default_exp ' pattern in code cells + and extracts the experiment name. The name must be a valid Python identifier. + Args: notebook_path (str): Path to Jupyter notebook. """ @@ -94,6 +90,16 @@ def __convert_to_python(self, notebook_path: Path, output_path: Path, export_fil return Path(output_path).joinpath(export_filename).resolve() + def _transform_script(self) -> None: + """ + Transform the script by commenting out flow.run() and changing the runtime backend. + """ + # Comment out flow.run() to prevent the flow from starting execution + self.__comment_flow_execution() + + # Change the runtime backend from 'ray' to 'single_process' + self.__change_runtime() + def __comment_flow_execution(self) -> None: """Comment out lines containing '.run()' in the specified Python script""" with open(self.script_path, "r") as f: @@ -380,7 +386,7 @@ def get_flow_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: flow_class_name (str): The name of the federated flow class to retrieve. Returns: - tuple: A tuple containing the runtime instance and the flow class name. + tuple: A tuple containing the runtime instance and the flow name. """ if not hasattr(self, "exported_script_module"): self.__import_exported_script() diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py index f75cb2a6cf..ab9167105d 100644 --- a/openfl/experimental/workflow/notebooktools/notebook_tools.py +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -8,7 +8,7 @@ from logging import getLogger from pathlib import Path from shutil import copytree -from typing import Tuple +from typing import Any, Dict, Tuple from openfl.experimental.workflow.federated.plan import Plan from openfl.experimental.workflow.interface.cli.cli_helper import print_tree @@ -25,16 +25,15 @@ class NotebookTools: notebook_path: Absolute path of jupyter notebook. template_workspace_path: Path to template workspace provided with OpenFL. - output_workspace_path: Output directory for new generated workspace - (default="/tmp"). + output_workspace_path: Output directory for new generated workspace. + code_analyzer: An instance of the CodeAnalyzer class for analyzing notebook code. """ def __init__(self, notebook_path: str, output_workspace: str) -> None: """Initialize a NotebookTools object. Args: - notebook_path (str): The path to the Jupyter notebook that needs to be converted. - output_workspace (str): The directory where the converted workspace will be saved - workspace + notebook_path (str): Path to Jupyter notebook to be converted. + output_workspace (str): Target directory for generated workspace """ self.notebook_path = Path(notebook_path).resolve() # Check if the Jupyter notebook exists @@ -125,18 +124,27 @@ def _generate_requirements(self) -> None: """Extracts pip libraries from exported python script and append in workspace/requirements.txt """ - requirements, line_numbers, data = self.code_analyzer.get_requirements() + try: + # Get requirements and related data from the code analyzer + requirements, line_numbers, data = self.code_analyzer.get_requirements() - requirements_filepath = str( - self.output_workspace_path.joinpath("requirements.txt").resolve() - ) + # Define the path for the requirements.txt file + requirements_filepath = str( + self.output_workspace_path.joinpath("requirements.txt").resolve() + ) + + # Write libraries found in requirements.txt + with open(requirements_filepath, "a") as f: + f.writelines(requirements) + + # Delete pip requirements from the python script to ensure it can be imported + self.code_analyzer.remove_lines(data, line_numbers) - # Write libraries found in requirements.txt - with open(requirements_filepath, "a") as f: - f.writelines(requirements) + logger.info(f"Successfully generated {requirements_filepath}") - # Delete pip requirements from the python script to ensure it can be imported - self.code_analyzer.remove_lines(data, line_numbers) + except Exception as e: + # Log error message with exception details + logger.error(f"Failed to generate requirements: {e}") def _clean_generated_workspace(self) -> None: """ @@ -169,56 +177,67 @@ def _generate_plan_yaml(self, director_fqdn: str = None, tls: bool = False) -> N flow_config = self.code_analyzer.fetch_flow_configuration(flow_details) # Determine the path for the plan.yaml file - plan = self.output_workspace_path.joinpath("plan", "plan.yaml").resolve() + plan_path = self.output_workspace_path.joinpath("plan", "plan.yaml").resolve() - # Initialize the YAML data - data = self._initialize_plan_yaml(plan) + # Build the complete plan configuration + data_config = self._build_plan_config(flow_config, director_fqdn, tls, plan_path) + + # Write the updated plan configuraiton to the plan.yaml file + Plan.dump(plan_path, data_config) + + def _build_plan_config( + self, flow_config: Dict[str, Any], director_fqdn: str, tls: bool, plan_path: Path + ) -> Dict[str, Any]: + """ + Build plan configuration with validation. + + Args: + flow_config: Flow configuration dictionary + director_fqdn: Director's FQDN + tls: TLS setting + plan_path: Path to plan.yaml - # Update the plan_configuration with the analyzed flow configuration - data["federated_flow"].update(flow_config["federated_flow"]) + Returns: + Dict[str, Any]: Complete plan configuration + """ + data_config = self._initialize_plan_yaml(plan_path) + data_config["federated_flow"].update(flow_config["federated_flow"]) - # Updating the aggregator address with director's hostname and tls settings in plan.yaml if director_fqdn: - network_settings = Plan.parse(plan).config["network"] - data["network"] = network_settings - data["network"]["settings"]["agg_addr"] = director_fqdn - data["network"]["settings"]["tls"] = tls + network_settings = Plan.parse(plan_path).config["network"] + data_config["network"] = network_settings + data_config["network"]["settings"]["agg_addr"] = director_fqdn + data_config["network"]["settings"]["tls"] = tls - # Write the updated plan configuraiton to the plan.yaml file - Plan.dump(plan, data) + return data_config def _generate_data_yaml(self) -> None: """Generate data.yaml""" - # Get flow class_name - if not hasattr(self, "flow_class_name"): - flow_details = self._extract_flow_details() - self.flow_class_name = flow_details["flow_class_name"] - - # Get runtime information using CodeAnalyzer - runtime, flow_instance_name = self.code_analyzer.get_flow_runtime_info(self.flow_class_name) + # Get runtime information + runtime, flow_instance_name = self._get_runtime_info() # Determine the path for the data.yaml data_yaml = self.output_workspace_path.joinpath("plan", "data.yaml").resolve() # Initialize the YAML data - data = self._initialize_data_yaml(data_yaml) + data_config = self._initialize_data_yaml(data_yaml) # Initiaize runtime name runtime_name = "runtime_local" # Process aggregator information using CodeAnalyzer runtime_created = self.code_analyzer.process_aggregator( - runtime, data, flow_instance_name, runtime_name + runtime, data_config, flow_instance_name, runtime_name ) # Process collaborator information using CodeAnalyzer self.code_analyzer.process_collaborators( - runtime, data, flow_instance_name, runtime_created, runtime_name + runtime, data_config, flow_instance_name, runtime_created, runtime_name ) # Write updated data configuration to the data.yaml file - Plan.dump(data_yaml, data) + Plan.dump(data_yaml, data_config) def _extract_flow_details(self) -> str: """Extract the flow class details""" @@ -228,7 +247,22 @@ def _extract_flow_details(self) -> str: raise ValueError("Failed to extract flow class details") return flow_details - def _initialize_plan_yaml(self, plan_yaml) -> dict: + def _get_runtime_info(self) -> Tuple[object, str]: + """ + Get runtime information for the flow class. + + Returns: + Tuple[object, str]: A tuple containing the runtime and flow instance name. + """ + if not hasattr(self, "flow_class_name"): + flow_details = self._extract_flow_details() + self.flow_class_name = flow_details["flow_class_name"] + + # Get runtime information using CodeAnalyzer + runtime, flow_instance_name = self.code_analyzer.get_flow_runtime_info(self.flow_class_name) + return runtime, flow_instance_name + + def _initialize_plan_yaml(self, plan_yaml: Path) -> dict: """Load or initialize the plan YAML data. Args: plan_yaml (Path): The path to the plan.yaml file. @@ -242,7 +276,7 @@ def _initialize_plan_yaml(self, plan_yaml) -> dict: data["federated_flow"] = {"settings": {}, "template": ""} return data - def _initialize_data_yaml(self, data_yaml) -> dict: + def _initialize_data_yaml(self, data_yaml: Path) -> dict: """Load or initialize the YAML data. Args: data_yaml (Path): The path to the data.yaml file. From 2d4093bee369c293b2d51808423ad2f669c7022e Mon Sep 17 00:00:00 2001 From: refai06 Date: Thu, 20 Feb 2025 12:40:56 +0530 Subject: [PATCH 6/7] Docstring & loggers update Signed-off-by: refai06 --- .../workflow/notebooktools/code_analyzer.py | 14 +++++++------- .../workflow/notebooktools/notebook_tools.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/openfl/experimental/workflow/notebooktools/code_analyzer.py b/openfl/experimental/workflow/notebooktools/code_analyzer.py index d0677cef71..d1badbfe4e 100644 --- a/openfl/experimental/workflow/notebooktools/code_analyzer.py +++ b/openfl/experimental/workflow/notebooktools/code_analyzer.py @@ -49,7 +49,7 @@ def __init__(self, notebook_path: Path, output_path: Path) -> None: self.script_name = self.script_path.name.split(".")[0].strip() # Transform the script - self._transform_script() + self.__transform_script() def __get_exp_name(self, notebook_path: Path) -> str: """Extract experiment name from Jupyter notebook @@ -90,7 +90,7 @@ def __convert_to_python(self, notebook_path: Path, output_path: Path, export_fil return Path(output_path).joinpath(export_filename).resolve() - def _transform_script(self) -> None: + def __transform_script(self) -> None: """ Transform the script by commenting out flow.run() and changing the runtime backend. """ @@ -380,13 +380,13 @@ def update_dictionary(args: dict, dtype: str = "args") -> None: return flow_config - def get_flow_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: - """Get federated flow class and runtime information. + def fetch_flow_runtime_info(self, flow_class_name: str) -> Tuple[object, str]: + """Fetch the federated flow class, its runtime information and flow instance name Args: - flow_class_name (str): The name of the federated flow class to retrieve. + flow_class_name (str): The name of the federated flow class. Returns: - tuple: A tuple containing the runtime instance and the flow name. + tuple: A tuple containing the runtime instance and the flow instance name. """ if not hasattr(self, "exported_script_module"): self.__import_exported_script() @@ -401,7 +401,7 @@ def _find_flow_instance_runtime(self, federated_flow_class) -> Tuple[str, object federated_flow_class: The class object of the federated flow. Returns: - tuple: A tuple containing the name of the flow instance and the runtime instance. + tuple: A tuple containing the name of the flow instance and the runtime. """ for t in self.available_modules_in_exported_script: tempstring = t diff --git a/openfl/experimental/workflow/notebooktools/notebook_tools.py b/openfl/experimental/workflow/notebooktools/notebook_tools.py index ab9167105d..8fde19e3b9 100644 --- a/openfl/experimental/workflow/notebooktools/notebook_tools.py +++ b/openfl/experimental/workflow/notebooktools/notebook_tools.py @@ -3,6 +3,7 @@ """Notebook Tools module.""" +import logging import shutil from importlib import import_module from logging import getLogger @@ -14,6 +15,7 @@ from openfl.experimental.workflow.interface.cli.cli_helper import print_tree from openfl.experimental.workflow.notebooktools.code_analyzer import CodeAnalyzer +logging.basicConfig(level=logging.INFO, format="%(message)s") logger = getLogger(__name__) @@ -215,7 +217,7 @@ def _generate_data_yaml(self) -> None: """Generate data.yaml""" # Get runtime information - runtime, flow_instance_name = self._get_runtime_info() + runtime, flow_instance_name = self._get_flow_runtime() # Determine the path for the data.yaml data_yaml = self.output_workspace_path.joinpath("plan", "data.yaml").resolve() @@ -247,9 +249,9 @@ def _extract_flow_details(self) -> str: raise ValueError("Failed to extract flow class details") return flow_details - def _get_runtime_info(self) -> Tuple[object, str]: + def _get_flow_runtime(self) -> Tuple[object, str]: """ - Get runtime information for the flow class. + Get the runtime and flow instance name using CodeAnalyzer Returns: Tuple[object, str]: A tuple containing the runtime and flow instance name. @@ -258,8 +260,10 @@ def _get_runtime_info(self) -> Tuple[object, str]: flow_details = self._extract_flow_details() self.flow_class_name = flow_details["flow_class_name"] - # Get runtime information using CodeAnalyzer - runtime, flow_instance_name = self.code_analyzer.get_flow_runtime_info(self.flow_class_name) + # Get runtime information and flow instance name using CodeAnalyzer + runtime, flow_instance_name = self.code_analyzer.fetch_flow_runtime_info( + self.flow_class_name + ) return runtime, flow_instance_name def _initialize_plan_yaml(self, plan_yaml: Path) -> dict: From 208c8f9956d567f367417730d3bf52ca7d48289c Mon Sep 17 00:00:00 2001 From: refai06 Date: Thu, 20 Feb 2025 17:54:47 +0530 Subject: [PATCH 7/7] Added NotebookTools module Testcase Signed-off-by: refai06 --- .../301_MNIST_Watermarking.ipynb | 924 ------------------ .../test_artifacts/actual/plan/data.yaml | 51 - .../test_artifacts/actual/plan/plan.yaml | 20 - .../test_artifacts/actual/requirements.txt | 6 - .../test_artifacts/actual/src/experiment.py | 664 ------------- .../test_artifacts/expected/plan/cols.yaml | 5 - .../test_artifacts/expected/plan/data.yaml | 51 - .../test_artifacts/expected/src/experiment.py | 664 ------------- .../testcase_export/test_script.py | 73 -- .../test_artifacts/actual/.workspace | 2 - .../test_artifacts/actual/plan/defaults | 2 - .../test_artifacts/actual/src/__init__.py | 2 - .../test_artifacts/actual/src/experiment.py | 380 ------- .../test_artifacts/expected/.workspace | 2 - .../test_artifacts/expected/plan/defaults | 2 - .../test_artifacts/expected/plan/plan.yaml | 25 - .../test_artifacts/expected/requirements.txt | 7 - .../test_artifacts/expected/src/__init__.py | 2 - .../testcase_export_federated/test_script.py | 95 -- .../workflow/NotebookTools/README.md | 50 + .../testcase_export/test_101_MNIST.ipynb | 344 +++++++ .../test_artifacts/expected}/.workspace | 0 .../test_artifacts/expected}/plan/cols.yaml | 0 .../test_artifacts/expected/plan/data.yaml | 8 + .../test_artifacts/expected}/plan/defaults | 0 .../test_artifacts/expected/plan/plan.yaml | 10 +- .../test_artifacts/expected/requirements.txt | 4 +- .../test_artifacts/expected}/src/__init__.py | 0 .../test_artifacts/expected/src/experiment.py | 228 +++++ .../testcase_export/test_script.py | 112 +++ .../test_MNIST_Watermarking.ipynb} | 86 +- .../test_artifacts/expected/.workspace | 0 .../test_artifacts/expected/plan/defaults | 0 .../test_artifacts/expected}/plan/plan.yaml | 0 .../test_artifacts/expected}/requirements.txt | 0 .../test_artifacts/expected/src/__init__.py | 0 .../test_artifacts/expected/src/experiment.py | 0 .../testcase_export_federated/test_script.py | 134 +++ 38 files changed, 883 insertions(+), 3070 deletions(-) delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py delete mode 100644 tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py create mode 100644 tests/openfl/experimental/workflow/NotebookTools/README.md create mode 100644 tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_101_MNIST.ipynb rename tests/{github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected}/.workspace (100%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected}/plan/cols.yaml (100%) create mode 100644 tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml rename tests/{github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected}/plan/defaults (100%) rename tests/{github => openfl}/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml (55%) rename tests/{github => openfl}/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt (53%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected}/src/__init__.py (100%) create mode 100644 tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py create mode 100644 tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_script.py rename tests/{github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb => openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_MNIST_Watermarking.ipynb} (86%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export => openfl/experimental/workflow/NotebookTools/testcase_export_federated}/test_artifacts/expected/.workspace (100%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export => openfl/experimental/workflow/NotebookTools/testcase_export_federated}/test_artifacts/expected/plan/defaults (100%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected}/plan/plan.yaml (100%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual => openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected}/requirements.txt (100%) rename tests/{github/experimental/workflow/NotebookTools/testcase_export => openfl/experimental/workflow/NotebookTools/testcase_export_federated}/test_artifacts/expected/src/__init__.py (100%) rename tests/{github => openfl}/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py (100%) create mode 100644 tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb b/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb deleted file mode 100644 index dcd327ed1a..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/301_MNIST_Watermarking.ipynb +++ /dev/null @@ -1,924 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "dc13070c", - "metadata": {}, - "source": [ - "# Workflow Interface 301: Watermarking\n", - "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/intel/openfl/blob/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8f28c451", - "metadata": {}, - "source": [ - "This OpenFL Workflow Interface tutorial demonstrates Watermarking of DL Model in Federated Learning. Watermarking enables the Model owner to assert ownership rights and detect stolen model instances. \n", - "\n", - "In this tutorial we use Backdooring to embed Watermark on a DL model trained on MNIST Dataset. This involves training the DL model with both the actual training data and the backdoor (a.k.a Watermark dataset). Watermark dataset is designed by the Model owner and consists of mislabelled input and output data pairs. Watermarked model performs normally on the Target dataset but returns incorrect labels on the Watermark dataset. Watermark dataset needs to be hidden from the Collaborators and Watermarking embedding needs to be performed at a trusted entity (Aggregator in this case)\n", - "\n", - "This workflow demonstrates: \n", - "- Flexibility to define the Watermark embedding steps as Aggregator processing steps without any involvement of Collaborators\n", - "- Ability to define Watermark dataset as a private attribute of Aggregator entity\n", - "- Flexibility to select a subset of collaborators on which Model Training is performed every training round\n", - "- Visualize the Workflow as a Graph\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a4394089", - "metadata": {}, - "source": [ - "# Getting Started" - ] - }, - { - "cell_type": "markdown", - "id": "ff167e44", - "metadata": {}, - "source": [ - "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", - "\n", - "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e9a73bd", - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp experiment" - ] - }, - { - "cell_type": "markdown", - "id": "e69cdbeb", - "metadata": {}, - "source": [ - "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "857f9995", - "metadata": {}, - "source": [ - "First we start by installing the necessary dependencies for the workflow interface" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7475cba", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "# !pip install git+https://github.com/securefederatedai/openfl.git\n", - "!pip install -r workflow_interface_requirements.txt\n", - "!pip install torch\n", - "!pip install torchvision\n", - "!pip install matplotlib\n", - "!pip install git+https://github.com/pyviz-topics/imagen.git@master\n", - "!pip install holoviews==1.15.4\n", - "\n", - "\n", - "# Uncomment this if running in Google Colab\n", - "#!pip install -r https://raw.githubusercontent.com/intel/openfl/develop/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt\n", - "#import os\n", - "#os.environ[\"USERNAME\"] = \"colab\"" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "7bd566df", - "metadata": {}, - "source": [ - "We begin with the quintessential example of a pytorch CNN model trained on the MNIST dataset. Let's start by defining our dataloaders, model, optimizer, and some helper functions like we would for any other deep learning experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9bd8ac2d", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "import torch\n", - "import torchvision\n", - "import numpy as np\n", - "import random\n", - "import pathlib\n", - "import os\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "import PIL.Image as Image\n", - "import imagen as ig\n", - "import numbergen as ng\n", - "\n", - "random_seed = 1\n", - "torch.backends.cudnn.enabled = False\n", - "torch.manual_seed(random_seed)\n", - "\n", - "# MNIST Train and Test datasets\n", - "mnist_train = torchvision.datasets.MNIST(\n", - " \"./files/\",\n", - " train=True,\n", - " download=True,\n", - " transform=torchvision.transforms.Compose(\n", - " [\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", - " ]\n", - " ),\n", - ")\n", - "\n", - "mnist_test = torchvision.datasets.MNIST(\n", - " \"./files/\",\n", - " train=False,\n", - " download=True,\n", - " transform=torchvision.transforms.Compose(\n", - " [\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", - " ]\n", - " ),\n", - ")\n", - "\n", - "\n", - "class Net(nn.Module):\n", - " def __init__(self, dropout=0.0):\n", - " super(Net, self).__init__()\n", - " self.dropout = dropout\n", - " self.block = nn.Sequential(\n", - " nn.Conv2d(1, 32, 2),\n", - " nn.MaxPool2d(2),\n", - " nn.ReLU(),\n", - " nn.Conv2d(32, 64, 2),\n", - " nn.MaxPool2d(2),\n", - " nn.ReLU(),\n", - " nn.Conv2d(64, 128, 2),\n", - " nn.ReLU(),\n", - " )\n", - " self.fc1 = nn.Linear(128 * 5**2, 200)\n", - " self.fc2 = nn.Linear(200, 10)\n", - " self.relu = nn.ReLU()\n", - " self.dropout = nn.Dropout(p=dropout)\n", - "\n", - " def forward(self, x):\n", - " x = self.dropout(x)\n", - " out = self.block(x)\n", - " out = out.view(-1, 128 * 5**2)\n", - " out = self.dropout(out)\n", - " out = self.relu(self.fc1(out))\n", - " out = self.dropout(out)\n", - " out = self.fc2(out)\n", - " return F.log_softmax(out, 1)\n", - "\n", - "\n", - "def inference(network, test_loader):\n", - " network.eval()\n", - " correct = 0\n", - " with torch.no_grad():\n", - " for data, target in test_loader:\n", - " output = network(data)\n", - " pred = output.data.max(1, keepdim=True)[1]\n", - " correct += pred.eq(target.data.view_as(pred)).sum()\n", - " accuracy = float(correct / len(test_loader.dataset))\n", - " return accuracy\n", - "\n", - "\n", - "def train_model(model, optimizer, data_loader, entity, round_number, log=False):\n", - " # Helper function to train the model\n", - " train_loss = 0\n", - " log_interval = 20\n", - " model.train()\n", - " for batch_idx, (X, y) in enumerate(data_loader):\n", - " optimizer.zero_grad()\n", - "\n", - " output = model(X)\n", - " loss = F.nll_loss(output, y)\n", - " loss.backward()\n", - "\n", - " optimizer.step()\n", - "\n", - " train_loss += loss.item() * len(X)\n", - " if batch_idx % log_interval == 0 and log:\n", - " print(\"{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}\".format(\n", - " entity,\n", - " round_number,\n", - " batch_idx * len(X),\n", - " len(data_loader.dataset),\n", - " 100.0 * batch_idx / len(data_loader),\n", - " loss.item(),\n", - " )\n", - " )\n", - " train_loss /= len(data_loader.dataset)\n", - " return train_loss" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f0c55175", - "metadata": {}, - "source": [ - "Watermark dataset consists of mislabelled (input, output) data pairs and is designed such that the model learns to exhibit an unusual prediction behavior on data points from this dataset. The unusual behavior can then be used to demonstrate model ownership and identify illegitimate model copies\n", - "\n", - "Let us prepare and inspect the sample Watermark dataset consisting of 100 images = 10 classes (1 for each digit) x 10 images (per class). Watermark images were generated by superimposing a unique pattern (per class) on a noisy background (10 images / class). (Reference - WAFFLE: Watermarking in Federated Learning https://arxiv.org/abs/2008.07298)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bcad2624", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "watermark_dir = \"./files/watermark-dataset/MWAFFLE/\"\n", - "\n", - "\n", - "def generate_watermark(\n", - " x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir\n", - "):\n", - " \"\"\"\n", - " Generate Watermark by superimposing a pattern on noisy background.\n", - "\n", - " Parameters\n", - " ----------\n", - " x_size: x dimension of the image\n", - " y_size: y dimension of the image\n", - " num_class: number of classes in the original dataset\n", - " num_samples_per_class: number of samples to be generated per class\n", - " img_dir: directory for saving watermark dataset\n", - "\n", - " Reference\n", - " ---------\n", - " WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", - "\n", - " \"\"\"\n", - " x_pattern = int(x_size * 2 / 3.0 - 1)\n", - " y_pattern = int(y_size * 2 / 3.0 - 1)\n", - "\n", - " np.random.seed(0)\n", - " for cls in range(num_class):\n", - " patterns = []\n", - " random_seed = 10 + cls\n", - " patterns.append(\n", - " ig.Line(\n", - " xdensity=x_pattern,\n", - " ydensity=y_pattern,\n", - " thickness=0.001,\n", - " orientation=np.pi * ng.UniformRandom(seed=random_seed),\n", - " x=ng.UniformRandom(seed=random_seed) - 0.5,\n", - " y=ng.UniformRandom(seed=random_seed) - 0.5,\n", - " scale=0.8,\n", - " )\n", - " )\n", - " patterns.append(\n", - " ig.Arc(\n", - " xdensity=x_pattern,\n", - " ydensity=y_pattern,\n", - " thickness=0.001,\n", - " orientation=np.pi * ng.UniformRandom(seed=random_seed),\n", - " x=ng.UniformRandom(seed=random_seed) - 0.5,\n", - " y=ng.UniformRandom(seed=random_seed) - 0.5,\n", - " size=0.33,\n", - " )\n", - " )\n", - "\n", - " pat = np.zeros((x_pattern, y_pattern))\n", - " for i in range(6):\n", - " j = np.random.randint(len(patterns))\n", - " pat += patterns[j]()\n", - " res = pat > 0.5\n", - " pat = res.astype(int)\n", - "\n", - " x_offset = np.random.randint(x_size - x_pattern + 1)\n", - " y_offset = np.random.randint(y_size - y_pattern + 1)\n", - "\n", - " for i in range(num_samples_per_class):\n", - " base = np.random.rand(x_size, y_size)\n", - " # base = np.zeros((x_input, y_input))\n", - " base[\n", - " x_offset : x_offset + pat.shape[0],\n", - " y_offset : y_offset + pat.shape[1],\n", - " ] += pat\n", - " d = np.ones((x_size, x_size))\n", - " img = np.minimum(base, d)\n", - " if not os.path.exists(img_dir + str(cls) + \"/\"):\n", - " os.makedirs(img_dir + str(cls) + \"/\")\n", - " plt.imsave(\n", - " img_dir + str(cls) + \"/wm_\" + str(i + 1) + \".png\",\n", - " img,\n", - " cmap=matplotlib.cm.gray,\n", - " )\n", - "\n", - "\n", - "# If the Watermark dataset does not exist, generate and save the Watermark images\n", - "watermark_path = pathlib.Path(watermark_dir)\n", - "if watermark_path.exists() and watermark_path.is_dir():\n", - " print(\n", - " f\"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... \"\n", - " )\n", - " pass\n", - "else:\n", - " print(f\"Generating Watermark dataset... \")\n", - " generate_watermark()\n", - "\n", - "\n", - "class WatermarkDataset(torch.utils.data.Dataset):\n", - " def __init__(self, images_dir, label_dir=None, transforms=None):\n", - " self.images_dir = os.path.abspath(images_dir)\n", - " self.image_paths = [\n", - " os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir)\n", - " ]\n", - " self.label_paths = label_dir\n", - " self.transform = transforms\n", - " temp = []\n", - "\n", - " # Recursively counting total number of images in the directory\n", - " for image_path in self.image_paths:\n", - " for path in os.walk(image_path):\n", - " if len(path) <= 1:\n", - " continue\n", - " path = path[2]\n", - " for im_n in [image_path + \"/\" + p for p in path]:\n", - " temp.append(im_n)\n", - " self.image_paths = temp\n", - "\n", - " if len(self.image_paths) == 0:\n", - " raise Exception(f\"No file(s) found under {images_dir}\")\n", - "\n", - " def __len__(self):\n", - " return len(self.image_paths)\n", - "\n", - " def __getitem__(self, idx):\n", - " image_filepath = self.image_paths[idx]\n", - " image = Image.open(image_filepath)\n", - " image = image.convert(\"RGB\")\n", - " image = self.transform(image)\n", - " label = int(image_filepath.split(\"/\")[-2])\n", - "\n", - " return image, label\n", - "\n", - "\n", - "def get_watermark_transforms():\n", - " return torchvision.transforms.Compose(\n", - " [\n", - " torchvision.transforms.Grayscale(),\n", - " torchvision.transforms.Resize(28),\n", - " torchvision.transforms.ToTensor(),\n", - " torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize\n", - " ]\n", - " )\n", - "\n", - "\n", - "watermark_data = WatermarkDataset(\n", - " images_dir=watermark_dir,\n", - " transforms=get_watermark_transforms(),\n", - ")\n", - "\n", - "# Set display_watermark to True to display the Watermark dataset\n", - "display_watermark = True\n", - "if display_watermark:\n", - " # Inspect and plot the Watermark Images\n", - " wm_images = np.empty((100, 28, 28))\n", - " wm_labels = np.empty([100, 1], dtype=int)\n", - "\n", - " for i in range(len(watermark_data)):\n", - " img, label = watermark_data[i]\n", - " wm_labels[label * 10 + i % 10] = label\n", - " wm_images[label * 10 + i % 10, :, :] = img.numpy()\n", - "\n", - " fig = plt.figure(figsize=(120, 120))\n", - " for i in range(100):\n", - " plt.subplot(10, 10, i + 1)\n", - " plt.imshow(wm_images[i], interpolation=\"none\")\n", - " plt.title(\"Label: {}\".format(wm_labels[i]), fontsize=80)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d82d34fd", - "metadata": {}, - "source": [ - "Next we import the `FLSpec`, `LocalRuntime`, placement decorators (`aggregator/collaborator`), and `InspectFlow`.\n", - "\n", - "- `FLSpec` – Defines the flow specification. User defined flows are subclasses of this.\n", - "- `Runtime` – Defines where the flow runs, infrastructure for task transitions (how information gets sent). The `LocalRuntime` runs the flow on a single node.\n", - "- `aggregator/collaborator` - placement decorators that define where the task will be assigned\n", - "- `InspectFlow` – Utility to visualize the User-defined workflow as a Graph (only currently compatible in flows without loops)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89cf4866", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "from copy import deepcopy\n", - "\n", - "from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator\n", - "from openfl.experimental.workflow.runtime import LocalRuntime\n", - "from openfl.experimental.workflow.placement import aggregator, collaborator\n", - "from openfl.experimental.workflow.utilities.ui import InspectFlow\n", - "\n", - "\n", - "def FedAvg(agg_model, models, weights=None):\n", - " state_dicts = [model.state_dict() for model in models]\n", - " state_dict = agg_model.state_dict()\n", - " for key in models[0].state_dict():\n", - " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],\n", - " axis=0, \n", - " weights=weights))\n", - " \n", - " agg_model.load_state_dict(state_dict)\n", - " return agg_model" - ] - }, - { - "attachments": { - "image.png": { - "image/png": "" - } - }, - "cell_type": "markdown", - "id": "c917b085", - "metadata": {}, - "source": [ - "Let us now define the Workflow for Watermark embedding. Here we use the same tasks as the [quickstart](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb), and define following additional steps for Watermarking\n", - "- PRE-TRAIN (watermark_retrain): At the start (once), initial model is trained on Watermark dataset for a specified number of epochs \n", - "- RE-TRAIN (watermark_pretrain): Every training round, Aggregated model is retrained on Watermark dataset until a desired acc threshold is reached or max number of retrain rounds are expired\n", - "\n", - "Notice that both the PRE-TRAIN and RE-TRAIN tasks are defined as Aggregator processing tasks\n", - "\n", - "![image.png](attachment:image.png)\n", - "\n", - "
Workflow for Watermarking" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52c4a752", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "class FederatedFlow_MNIST_Watermarking(FLSpec):\n", - " \"\"\"\n", - " This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning\n", - " Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", - " \"\"\"\n", - "\n", - " def __init__(\n", - " self,\n", - " model=None,\n", - " optimizer=None,\n", - " watermark_pretrain_optimizer=None,\n", - " watermark_retrain_optimizer=None,\n", - " round_number=0,\n", - " **kwargs,\n", - " ):\n", - " super().__init__(**kwargs)\n", - "\n", - " if model is not None:\n", - " self.model = model\n", - " self.optimizer = optimizer\n", - " self.watermark_pretrain_optimizer = watermark_pretrain_optimizer\n", - " self.watermark_retrain_optimizer = watermark_retrain_optimizer\n", - " else:\n", - " self.model = Net()\n", - " self.optimizer = optim.SGD(\n", - " self.model.parameters(), lr=learning_rate, momentum=momentum\n", - " )\n", - " self.watermark_pretrain_optimizer = optim.SGD(\n", - " self.model.parameters(),\n", - " lr=watermark_pretrain_learning_rate,\n", - " momentum=watermark_pretrain_momentum,\n", - " weight_decay=watermark_pretrain_weight_decay,\n", - " )\n", - " self.watermark_retrain_optimizer = optim.SGD(\n", - " self.model.parameters(), lr=watermark_retrain_learning_rate\n", - " )\n", - " self.round_number = round_number\n", - " self.watermark_pretraining_completed = False\n", - "\n", - " @aggregator\n", - " def start(self):\n", - " \"\"\"\n", - " This is the start of the Flow.\n", - " \"\"\"\n", - "\n", - " print(f\": Start of flow ... \")\n", - " self.collaborators = self.runtime.collaborators\n", - "\n", - " # Randomly select a fraction of actual collaborator every round\n", - " fraction = 0.5\n", - " if int(fraction * len(self.collaborators)) < 1:\n", - " raise Exception(\n", - " f\"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training\"\n", - " )\n", - " self.subset_collaborators = random.sample(\n", - " self.collaborators, int(fraction * (len(self.collaborators)))\n", - " )\n", - "\n", - " self.next(self.watermark_pretrain)\n", - "\n", - " @aggregator\n", - " def watermark_pretrain(self):\n", - " \"\"\"\n", - " Pre-Train the Model before starting Federated Learning.\n", - " \"\"\"\n", - " if not self.watermark_pretraining_completed:\n", - "\n", - " print(\": Performing Watermark Pre-training\")\n", - "\n", - " for i in range(self.pretrain_epochs):\n", - "\n", - " watermark_pretrain_loss = train_model(\n", - " self.model,\n", - " self.watermark_pretrain_optimizer,\n", - " self.watermark_data_loader,\n", - " \":\",\n", - " i,\n", - " log=False,\n", - " )\n", - " watermark_pretrain_validation_score = inference(\n", - " self.model, self.watermark_data_loader\n", - " )\n", - "\n", - " print(\n", - " \": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}\".format(\n", - " i,\n", - " watermark_pretrain_loss,\n", - " watermark_pretrain_validation_score,\n", - " )\n", - " )\n", - "\n", - " self.watermark_pretraining_completed = True\n", - "\n", - " self.next(\n", - " self.aggregated_model_validation,\n", - " foreach=\"subset_collaborators\",\n", - " exclude=[\"watermark_pretrain_optimizer\", \"watermark_retrain_optimizer\"],\n", - " )\n", - "\n", - " @collaborator\n", - " def aggregated_model_validation(self):\n", - " \"\"\"\n", - " Perform Aggregated Model validation on Collaborators.\n", - " \"\"\"\n", - " self.agg_validation_score = inference(self.model, self.test_loader)\n", - " print(\n", - " f\" Aggregated Model validation score = {self.agg_validation_score}\"\n", - " )\n", - "\n", - " self.next(self.train)\n", - "\n", - " @collaborator\n", - " def train(self):\n", - " \"\"\"\n", - " Train model on Local collab dataset.\n", - "\n", - " \"\"\"\n", - " print(\": Performing Model Training on Local dataset ... \")\n", - "\n", - " self.optimizer = optim.SGD(\n", - " self.model.parameters(), lr=learning_rate, momentum=momentum\n", - " )\n", - "\n", - " self.loss = train_model(\n", - " self.model,\n", - " self.optimizer,\n", - " self.train_loader,\n", - " \"\"),\n", - " self.round_number if self.round_number is not None else 0,\n", - " log=True,\n", - " )\n", - "\n", - " self.next(self.local_model_validation)\n", - "\n", - " @collaborator\n", - " def local_model_validation(self):\n", - " \"\"\"\n", - " Validate locally trained model.\n", - "\n", - " \"\"\"\n", - " self.local_validation_score = inference(self.model, self.test_loader)\n", - " print(\n", - " f\" Local model validation score = {self.local_validation_score}\"\n", - " )\n", - " self.next(self.join)\n", - "\n", - " @aggregator\n", - " def join(self, inputs):\n", - " \"\"\"\n", - " Model aggregation step.\n", - " \"\"\"\n", - "\n", - " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", - " self.aggregated_model_accuracy = sum(\n", - " input.agg_validation_score for input in inputs\n", - " ) / len(inputs)\n", - " self.local_model_accuracy = sum(\n", - " input.local_validation_score for input in inputs\n", - " ) / len(inputs)\n", - "\n", - " print(f\": Joining models from collaborators...\")\n", - "\n", - " print(\n", - " f\" Aggregated model validation score = {self.aggregated_model_accuracy}\"\n", - " )\n", - " print(f\" Average training loss = {self.average_loss}\")\n", - " print(f\" Average local model validation values = {self.local_model_accuracy}\")\n", - "\n", - " self.model = FedAvg(self.model, [input.model for input in inputs])\n", - "\n", - " self.next(self.watermark_retrain)\n", - "\n", - " @aggregator\n", - " def watermark_retrain(self):\n", - " \"\"\"\n", - " Retrain the aggregated model.\n", - "\n", - " \"\"\"\n", - " print(\": Performing Watermark Retraining ... \")\n", - " self.watermark_retrain_optimizer = optim.SGD(\n", - " self.model.parameters(), lr=watermark_retrain_learning_rate\n", - " )\n", - "\n", - " retrain_round = 0\n", - "\n", - " # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs)\n", - " self.watermark_retrain_validation_score = inference(\n", - " self.model, self.watermark_data_loader\n", - " )\n", - " while (\n", - " self.watermark_retrain_validation_score < self.watermark_acc_threshold\n", - " ) and (retrain_round < self.retrain_epochs):\n", - " self.watermark_retrain_train_loss = train_model(\n", - " self.model,\n", - " self.watermark_retrain_optimizer,\n", - " self.watermark_data_loader,\n", - " \"\",\n", - " retrain_round,\n", - " log=False,\n", - " )\n", - " self.watermark_retrain_validation_score = inference(\n", - " self.model, self.watermark_data_loader\n", - " )\n", - "\n", - " print(\n", - " \": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}\".format(\n", - " self.round_number,\n", - " retrain_round,\n", - " self.watermark_retrain_train_loss,\n", - " self.watermark_retrain_validation_score,\n", - " )\n", - " )\n", - "\n", - " retrain_round += 1\n", - "\n", - " self.next(self.end)\n", - "\n", - " @aggregator\n", - " def end(self):\n", - " \"\"\"\n", - " This is the last step in the Flow.\n", - "\n", - " \"\"\"\n", - " print(f\"This is the end of the flow\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c6da2c42", - "metadata": {}, - "source": [ - "In the `FederatedFlow_MNIST_Watermarking` definition above, you will notice that certain attributes of the flow were not initialized, namely the `watermark_data_loader` for Aggregator and `train_loader`, `test_loader` for the Collaborators. \n", - "\n", - "- Collaborator attributes are created in the same manner as described in [quickstart](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb)\n", - "\n", - "- `watermark_data_loader` is created as a **private attribute** of the Aggregator which is set by `callable_to_initialize_aggregator_private_attributes` callable function. It is exposed only via the runtime. This property enables the Watermark dataset to be hidden from the collaborators as Aggregator private attributes are filtered before the state is transferred to Collaborators (in the same manner as Collaborator private attributes are hidden from Aggregator)\n", - "\n", - "Lets define these attributes along with some other parameters (seed, batch-sizes, optimizer parameters) and create the LocalRuntime" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bffcc141", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "# Set random seed\n", - "random_seed = 42\n", - "torch.manual_seed(random_seed)\n", - "np.random.seed(random_seed)\n", - "torch.backends.cudnn.enabled = False\n", - "\n", - "# Batch sizes\n", - "batch_size_train = 64\n", - "batch_size_test = 64\n", - "batch_size_watermark = 50\n", - "\n", - "# MNIST parameters\n", - "learning_rate = 5e-2\n", - "momentum = 5e-1\n", - "log_interval = 20\n", - "\n", - "# Watermarking parameters\n", - "watermark_pretrain_learning_rate = 1e-1\n", - "watermark_pretrain_momentum = 5e-1\n", - "watermark_pretrain_weight_decay = 5e-05\n", - "watermark_retrain_learning_rate = 5e-3" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3d7ce52f", - "metadata": {}, - "source": [ - "## Setup Federation\n", - "\n", - "Private attributes can be set using callback function while instantiating the participant. Parameters required by the callback function are specified as arguments while instantiating the participant. In this example callback function, there are 2 callable function namely `callable_to_initialize_aggregator_private_attributes`, and `callable_to_initialize_collaborator_private_attributes`, returns the private attributes respectively for aggregator and collaborator.\n", - "\n", - "\n", - "Aggregator callable function `callable_to_initialize_aggregator_private_attributes` returns `watermark_data_loader`, `pretrain_epochs`, `retrain_epochs`, `watermark_acc_threshold`, and `watermark_pretraining_completed`. Collaborator callable function `callable_to_initialize_aggregator_private_attributes` returns `train_loader` and `test_loader` of the collaborator." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5f6e104", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size):\n", - " return {\n", - " \"watermark_data_loader\": torch.utils.data.DataLoader(\n", - " watermark_data, batch_size=batch_size, shuffle=True\n", - " ),\n", - " \"pretrain_epochs\": 25,\n", - " \"retrain_epochs\": 25,\n", - " \"watermark_acc_threshold\": 0.98,\n", - " }\n", - "\n", - "# Setup Aggregator private attributes via callable function\n", - "aggregator = Aggregator(\n", - " name=\"agg\",\n", - " private_attributes_callable=callable_to_initialize_aggregator_private_attributes,\n", - " watermark_data=watermark_data,\n", - " batch_size=batch_size_watermark,\n", - " )\n", - "\n", - "collaborator_names = [\n", - " \"Portland\",\n", - " \"Seattle\",\n", - " \"Chandler\",\n", - " \"Bangalore\",\n", - " \"New Delhi\",\n", - "]\n", - "\n", - "def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset):\n", - " train = deepcopy(train_dataset)\n", - " test = deepcopy(test_dataset)\n", - " train.data = train_dataset.data[index::n_collaborators]\n", - " train.targets = train_dataset.targets[index::n_collaborators]\n", - " test.data = test_dataset.data[index::n_collaborators]\n", - " test.targets = test_dataset.targets[index::n_collaborators]\n", - "\n", - " return {\n", - " \"train_loader\": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True),\n", - " \"test_loader\": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True),\n", - " }\n", - "\n", - "# Setup Collaborators private attributes via callable function\n", - "collaborators = []\n", - "for idx, collaborator_name in enumerate(collaborator_names):\n", - " collaborators.append(\n", - " Collaborator(\n", - " name=collaborator_name, num_cpus=0, num_gpus=0,\n", - " private_attributes_callable=callable_to_initialize_collaborator_private_attributes,\n", - " index=idx, n_collaborators=len(collaborator_names),\n", - " train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64\n", - " )\n", - " )\n", - "\n", - "local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend=\"ray\")\n", - "print(f\"Local runtime collaborators = {local_runtime.collaborators}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "02935ccf", - "metadata": {}, - "source": [ - "Now that we have our flow and runtime defined, let's run the experiment! " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6d19819", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "model = Net()\n", - "optimizer = optim.SGD(\n", - " model.parameters(), lr=learning_rate, momentum=momentum\n", - ")\n", - "watermark_pretrain_optimizer = optim.SGD(\n", - " model.parameters(),\n", - " lr=watermark_pretrain_learning_rate,\n", - " momentum=watermark_pretrain_momentum,\n", - " weight_decay=watermark_pretrain_weight_decay,\n", - ")\n", - "watermark_retrain_optimizer = optim.SGD(\n", - " model.parameters(), lr=watermark_retrain_learning_rate\n", - ")\n", - "best_model = None\n", - "round_number = 0\n", - "top_model_accuracy = 0\n", - "\n", - "flflow = FederatedFlow_MNIST_Watermarking(\n", - " model,\n", - " optimizer,\n", - " watermark_pretrain_optimizer,\n", - " watermark_retrain_optimizer,\n", - " round_number,\n", - " checkpoint=True,\n", - ")\n", - "flflow.runtime = local_runtime\n", - "for i in range(1):\n", - " print(f\"Starting round {i}...\")\n", - " flflow.run()\n", - " flflow.round_number += 1\n", - " if hasattr(flflow, \"aggregated_model_accuracy\"):\n", - " aggregated_model_accuracy = flflow.aggregated_model_accuracy\n", - " if aggregated_model_accuracy > top_model_accuracy:\n", - " print(\n", - " f\"\\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\\n\"\n", - " )\n", - " top_model_accuracy = aggregated_model_accuracy\n", - " best_model = flflow.model\n", - "\n", - " torch.save(best_model.state_dict(), \"watermarked_mnist_model.pth\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env-workspace-builder-openfl", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.19" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml deleted file mode 100644 index f39d623fc6..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/data.yaml +++ /dev/null @@ -1,51 +0,0 @@ -Bangalore: - callable_func: - settings: - batch_size: 64 - index: 3 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Chandler: - callable_func: - settings: - batch_size: 64 - index: 2 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -New Delhi: - callable_func: - settings: - batch_size: 64 - index: 4 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Portland: - callable_func: - settings: - batch_size: 64 - index: 0 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Seattle: - callable_func: - settings: - batch_size: 64 - index: 1 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -aggregator: - callable_func: - settings: - batch_size: 50 - watermark_data: src.experiment.watermark_data - template: src.experiment.callable_to_initialize_aggregator_private_attributes diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml deleted file mode 100644 index c9bea91dfa..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/plan.yaml +++ /dev/null @@ -1,20 +0,0 @@ -aggregator: - defaults: plan/defaults/aggregator.yaml - settings: - rounds_to_train: 1 - template: openfl.experimental.workflow.component.Aggregator -collaborator: - defaults: plan/defaults/collaborator.yaml - settings: {} - template: openfl.experimental.workflow.component.Collaborator -federated_flow: - settings: - checkpoint: true - model: src.experiment.model - optimizer: src.experiment.optimizer - round_number: 0 - watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer - watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer - template: src.experiment.FederatedFlow_MNIST_Watermarking -network: - defaults: plan/defaults/network.yaml diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt deleted file mode 100644 index 8946ff2cac..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability -torch -torchvision -matplotlib -git+https://github.com/pyviz-topics/imagen.git@master -holoviews==1.15.4 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py deleted file mode 100644 index a984387881..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/experiment.py +++ /dev/null @@ -1,664 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../301_MNIST_Watermarking.ipynb. - -# %% auto 0 -__all__ = ['random_seed', 'mnist_train', 'mnist_test', 'watermark_dir', 'watermark_path', 'watermark_data', 'display_watermark', - 'batch_size_train', 'batch_size_test', 'batch_size_watermark', 'learning_rate', 'momentum', 'log_interval', - 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', 'watermark_pretrain_weight_decay', - 'watermark_retrain_learning_rate', 'aggregator', 'collaborator_names', 'collaborators', 'local_runtime', - 'model', 'optimizer', 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'best_model', - 'round_number', 'top_model_accuracy', 'flflow', 'Net', 'inference', 'train_model', 'generate_watermark', - 'WatermarkDataset', 'get_watermark_transforms', 'FedAvg', 'FederatedFlow_MNIST_Watermarking', - 'callable_to_initialize_aggregator_private_attributes', - 'callable_to_initialize_collaborator_private_attributes'] - -# %% ../../../301_MNIST_Watermarking.ipynb 7 - - -# Uncomment this if running in Google Colab -#import os -#os.environ["USERNAME"] = "colab" - -# %% ../../../301_MNIST_Watermarking.ipynb 9 -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -import torch -import torchvision -import numpy as np -import random -import pathlib -import os -import matplotlib -import matplotlib.pyplot as plt -import PIL.Image as Image -import imagen as ig -import numbergen as ng - -random_seed = 1 -torch.backends.cudnn.enabled = False -torch.manual_seed(random_seed) - -# MNIST Train and Test datasets -mnist_train = torchvision.datasets.MNIST( - "./files/", - train=True, - download=True, - transform=torchvision.transforms.Compose( - [ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize((0.1307,), (0.3081,)), - ] - ), -) - -mnist_test = torchvision.datasets.MNIST( - "./files/", - train=False, - download=True, - transform=torchvision.transforms.Compose( - [ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize((0.1307,), (0.3081,)), - ] - ), -) - - -class Net(nn.Module): - def __init__(self, dropout=0.0): - super(Net, self).__init__() - self.dropout = dropout - self.block = nn.Sequential( - nn.Conv2d(1, 32, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(32, 64, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(64, 128, 2), - nn.ReLU(), - ) - self.fc1 = nn.Linear(128 * 5**2, 200) - self.fc2 = nn.Linear(200, 10) - self.relu = nn.ReLU() - self.dropout = nn.Dropout(p=dropout) - - def forward(self, x): - x = self.dropout(x) - out = self.block(x) - out = out.view(-1, 128 * 5**2) - out = self.dropout(out) - out = self.relu(self.fc1(out)) - out = self.dropout(out) - out = self.fc2(out) - return F.log_softmax(out, 1) - - -def inference(network, test_loader): - network.eval() - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - output = network(data) - pred = output.data.max(1, keepdim=True)[1] - correct += pred.eq(target.data.view_as(pred)).sum() - accuracy = float(correct / len(test_loader.dataset)) - return accuracy - - -def train_model(model, optimizer, data_loader, entity, round_number, log=False): - # Helper function to train the model - train_loss = 0 - log_interval = 20 - model.train() - for batch_idx, (X, y) in enumerate(data_loader): - optimizer.zero_grad() - - output = model(X) - loss = F.nll_loss(output, y) - loss.backward() - - optimizer.step() - - train_loss += loss.item() * len(X) - if batch_idx % log_interval == 0 and log: - print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( - entity, - round_number, - batch_idx * len(X), - len(data_loader.dataset), - 100.0 * batch_idx / len(data_loader), - loss.item(), - ) - ) - train_loss /= len(data_loader.dataset) - return train_loss - -# %% ../../../301_MNIST_Watermarking.ipynb 11 -watermark_dir = "./files/watermark-dataset/MWAFFLE/" - - -def generate_watermark( - x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir -): - """ - Generate Watermark by superimposing a pattern on noisy background. - - Parameters - ---------- - x_size: x dimension of the image - y_size: y dimension of the image - num_class: number of classes in the original dataset - num_samples_per_class: number of samples to be generated per class - img_dir: directory for saving watermark dataset - - Reference - --------- - WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) - - """ - x_pattern = int(x_size * 2 / 3.0 - 1) - y_pattern = int(y_size * 2 / 3.0 - 1) - - np.random.seed(0) - for cls in range(num_class): - patterns = [] - random_seed = 10 + cls - patterns.append( - ig.Line( - xdensity=x_pattern, - ydensity=y_pattern, - thickness=0.001, - orientation=np.pi * ng.UniformRandom(seed=random_seed), - x=ng.UniformRandom(seed=random_seed) - 0.5, - y=ng.UniformRandom(seed=random_seed) - 0.5, - scale=0.8, - ) - ) - patterns.append( - ig.Arc( - xdensity=x_pattern, - ydensity=y_pattern, - thickness=0.001, - orientation=np.pi * ng.UniformRandom(seed=random_seed), - x=ng.UniformRandom(seed=random_seed) - 0.5, - y=ng.UniformRandom(seed=random_seed) - 0.5, - size=0.33, - ) - ) - - pat = np.zeros((x_pattern, y_pattern)) - for i in range(6): - j = np.random.randint(len(patterns)) - pat += patterns[j]() - res = pat > 0.5 - pat = res.astype(int) - - x_offset = np.random.randint(x_size - x_pattern + 1) - y_offset = np.random.randint(y_size - y_pattern + 1) - - for i in range(num_samples_per_class): - base = np.random.rand(x_size, y_size) - # base = np.zeros((x_input, y_input)) - base[ - x_offset : x_offset + pat.shape[0], - y_offset : y_offset + pat.shape[1], - ] += pat - d = np.ones((x_size, x_size)) - img = np.minimum(base, d) - if not os.path.exists(img_dir + str(cls) + "/"): - os.makedirs(img_dir + str(cls) + "/") - plt.imsave( - img_dir + str(cls) + "/wm_" + str(i + 1) + ".png", - img, - cmap=matplotlib.cm.gray, - ) - - -# If the Watermark dataset does not exist, generate and save the Watermark images -watermark_path = pathlib.Path(watermark_dir) -if watermark_path.exists() and watermark_path.is_dir(): - print( - f"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... " - ) - pass -else: - print(f"Generating Watermark dataset... ") - generate_watermark() - - -class WatermarkDataset(torch.utils.data.Dataset): - def __init__(self, images_dir, label_dir=None, transforms=None): - self.images_dir = os.path.abspath(images_dir) - self.image_paths = [ - os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir) - ] - self.label_paths = label_dir - self.transform = transforms - temp = [] - - # Recursively counting total number of images in the directory - for image_path in self.image_paths: - for path in os.walk(image_path): - if len(path) <= 1: - continue - path = path[2] - for im_n in [image_path + "/" + p for p in path]: - temp.append(im_n) - self.image_paths = temp - - if len(self.image_paths) == 0: - raise Exception(f"No file(s) found under {images_dir}") - - def __len__(self): - return len(self.image_paths) - - def __getitem__(self, idx): - image_filepath = self.image_paths[idx] - image = Image.open(image_filepath) - image = image.convert("RGB") - image = self.transform(image) - label = int(image_filepath.split("/")[-2]) - - return image, label - - -def get_watermark_transforms(): - return torchvision.transforms.Compose( - [ - torchvision.transforms.Grayscale(), - torchvision.transforms.Resize(28), - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize - ] - ) - - -watermark_data = WatermarkDataset( - images_dir=watermark_dir, - transforms=get_watermark_transforms(), -) - -# Set display_watermark to True to display the Watermark dataset -display_watermark = True -if display_watermark: - # Inspect and plot the Watermark Images - wm_images = np.empty((100, 28, 28)) - wm_labels = np.empty([100, 1], dtype=int) - - for i in range(len(watermark_data)): - img, label = watermark_data[i] - wm_labels[label * 10 + i % 10] = label - wm_images[label * 10 + i % 10, :, :] = img.numpy() - - fig = plt.figure(figsize=(120, 120)) - for i in range(100): - plt.subplot(10, 10, i + 1) - plt.imshow(wm_images[i], interpolation="none") - plt.title("Label: {}".format(wm_labels[i]), fontsize=80) - -# %% ../../../301_MNIST_Watermarking.ipynb 13 -from copy import deepcopy - -from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator -from openfl.experimental.workflow.runtime import LocalRuntime -from openfl.experimental.workflow.placement import aggregator, collaborator -from openfl.experimental.workflow.utilities.ui import InspectFlow - - -def FedAvg(agg_model, models, weights=None): - state_dicts = [model.state_dict() for model in models] - state_dict = agg_model.state_dict() - for key in models[0].state_dict(): - state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], - axis=0, - weights=weights)) - - agg_model.load_state_dict(state_dict) - return agg_model - -# %% ../../../301_MNIST_Watermarking.ipynb 15 -class FederatedFlow_MNIST_Watermarking(FLSpec): - """ - This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning - Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) - """ - - def __init__( - self, - model=None, - optimizer=None, - watermark_pretrain_optimizer=None, - watermark_retrain_optimizer=None, - round_number=0, - **kwargs, - ): - super().__init__(**kwargs) - - if model is not None: - self.model = model - self.optimizer = optimizer - self.watermark_pretrain_optimizer = watermark_pretrain_optimizer - self.watermark_retrain_optimizer = watermark_retrain_optimizer - else: - self.model = Net() - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - self.watermark_pretrain_optimizer = optim.SGD( - self.model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, - ) - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - self.round_number = round_number - self.watermark_pretraining_completed = False - - @aggregator - def start(self): - """ - This is the start of the Flow. - """ - - print(f": Start of flow ... ") - self.collaborators = self.runtime.collaborators - - # Randomly select a fraction of actual collaborator every round - fraction = 0.5 - if int(fraction * len(self.collaborators)) < 1: - raise Exception( - f"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training" - ) - self.subset_collaborators = random.sample( - self.collaborators, int(fraction * (len(self.collaborators))) - ) - - self.next(self.watermark_pretrain) - - @aggregator - def watermark_pretrain(self): - """ - Pre-Train the Model before starting Federated Learning. - """ - if not self.watermark_pretraining_completed: - - print(": Performing Watermark Pre-training") - - for i in range(self.pretrain_epochs): - - watermark_pretrain_loss = train_model( - self.model, - self.watermark_pretrain_optimizer, - self.watermark_data_loader, - ":", - i, - log=False, - ) - watermark_pretrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print( - ": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}".format( - i, - watermark_pretrain_loss, - watermark_pretrain_validation_score, - ) - ) - - self.watermark_pretraining_completed = True - - self.next( - self.aggregated_model_validation, - foreach="subset_collaborators", - exclude=["watermark_pretrain_optimizer", "watermark_retrain_optimizer"], - ) - - @collaborator - def aggregated_model_validation(self): - """ - Perform Aggregated Model validation on Collaborators. - """ - self.agg_validation_score = inference(self.model, self.test_loader) - print( - f" Aggregated Model validation score = {self.agg_validation_score}" - ) - - self.next(self.train) - - @collaborator - def train(self): - """ - Train model on Local collab dataset. - - """ - print(": Performing Model Training on Local dataset ... ") - - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - - self.loss = train_model( - self.model, - self.optimizer, - self.train_loader, - ""), - self.round_number if self.round_number is not None else 0, - log=True, - ) - - self.next(self.local_model_validation) - - @collaborator - def local_model_validation(self): - """ - Validate locally trained model. - - """ - self.local_validation_score = inference(self.model, self.test_loader) - print( - f" Local model validation score = {self.local_validation_score}" - ) - self.next(self.join) - - @aggregator - def join(self, inputs): - """ - Model aggregation step. - """ - - self.average_loss = sum(input.loss for input in inputs) / len(inputs) - self.aggregated_model_accuracy = sum( - input.agg_validation_score for input in inputs - ) / len(inputs) - self.local_model_accuracy = sum( - input.local_validation_score for input in inputs - ) / len(inputs) - - print(f": Joining models from collaborators...") - - print( - f" Aggregated model validation score = {self.aggregated_model_accuracy}" - ) - print(f" Average training loss = {self.average_loss}") - print(f" Average local model validation values = {self.local_model_accuracy}") - - self.model = FedAvg(self.model, [input.model for input in inputs]) - - self.next(self.watermark_retrain) - - @aggregator - def watermark_retrain(self): - """ - Retrain the aggregated model. - - """ - print(": Performing Watermark Retraining ... ") - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - - retrain_round = 0 - - # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - while ( - self.watermark_retrain_validation_score < self.watermark_acc_threshold - ) and (retrain_round < self.retrain_epochs): - self.watermark_retrain_train_loss = train_model( - self.model, - self.watermark_retrain_optimizer, - self.watermark_data_loader, - "", - retrain_round, - log=False, - ) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print( - ": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}".format( - self.round_number, - retrain_round, - self.watermark_retrain_train_loss, - self.watermark_retrain_validation_score, - ) - ) - - retrain_round += 1 - - self.next(self.end) - - @aggregator - def end(self): - """ - This is the last step in the Flow. - - """ - print(f"This is the end of the flow") - -# %% ../../../301_MNIST_Watermarking.ipynb 17 -# Set random seed -random_seed = 42 -torch.manual_seed(random_seed) -np.random.seed(random_seed) -torch.backends.cudnn.enabled = False - -# Batch sizes -batch_size_train = 64 -batch_size_test = 64 -batch_size_watermark = 50 - -# MNIST parameters -learning_rate = 5e-2 -momentum = 5e-1 -log_interval = 20 - -# Watermarking parameters -watermark_pretrain_learning_rate = 1e-1 -watermark_pretrain_momentum = 5e-1 -watermark_pretrain_weight_decay = 5e-05 -watermark_retrain_learning_rate = 5e-3 - -# %% ../../../301_MNIST_Watermarking.ipynb 19 -def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size): - return { - "watermark_data_loader": torch.utils.data.DataLoader( - watermark_data, batch_size=batch_size, shuffle=True - ), - "pretrain_epochs": 25, - "retrain_epochs": 25, - "watermark_acc_threshold": 0.98, - } - -# Setup Aggregator private attributes via callable function -aggregator = Aggregator( - name="agg", - private_attributes_callable=callable_to_initialize_aggregator_private_attributes, - watermark_data=watermark_data, - batch_size=batch_size_watermark, - ) - -collaborator_names = [ - "Portland", - "Seattle", - "Chandler", - "Bangalore", - "New Delhi", -] - -def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset): - train = deepcopy(train_dataset) - test = deepcopy(test_dataset) - train.data = train_dataset.data[index::n_collaborators] - train.targets = train_dataset.targets[index::n_collaborators] - test.data = test_dataset.data[index::n_collaborators] - test.targets = test_dataset.targets[index::n_collaborators] - - return { - "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), - "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), - } - -# Setup Collaborators private attributes via callable function -collaborators = [] -for idx, collaborator_name in enumerate(collaborator_names): - collaborators.append( - Collaborator( - name=collaborator_name, num_cpus=0, num_gpus=0, - private_attributes_callable=callable_to_initialize_collaborator_private_attributes, - index=idx, n_collaborators=len(collaborator_names), - train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64 - ) - ) - -local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend="single_process") -print(f"Local runtime collaborators = {local_runtime.collaborators}") - -# %% ../../../301_MNIST_Watermarking.ipynb 21 -model = Net() -optimizer = optim.SGD( - model.parameters(), lr=learning_rate, momentum=momentum -) -watermark_pretrain_optimizer = optim.SGD( - model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, -) -watermark_retrain_optimizer = optim.SGD( - model.parameters(), lr=watermark_retrain_learning_rate -) -best_model = None -round_number = 0 -top_model_accuracy = 0 - -flflow = FederatedFlow_MNIST_Watermarking( - model, - optimizer, - watermark_pretrain_optimizer, - watermark_retrain_optimizer, - round_number, - checkpoint=True, -) -flflow.runtime = local_runtime -for i in range(1): - print(f"Starting round {i}...") -# flflow.run() - flflow.round_number += 1 - if hasattr(flflow, "aggregated_model_accuracy"): - aggregated_model_accuracy = flflow.aggregated_model_accuracy - if aggregated_model_accuracy > top_model_accuracy: - print( - f"\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\n" - ) - top_model_accuracy = aggregated_model_accuracy - best_model = flflow.model - - torch.save(best_model.state_dict(), "watermarked_mnist_model.pth") diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml deleted file mode 100644 index 95307de3bc..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: - \ No newline at end of file diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml deleted file mode 100644 index f39d623fc6..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml +++ /dev/null @@ -1,51 +0,0 @@ -Bangalore: - callable_func: - settings: - batch_size: 64 - index: 3 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Chandler: - callable_func: - settings: - batch_size: 64 - index: 2 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -New Delhi: - callable_func: - settings: - batch_size: 64 - index: 4 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Portland: - callable_func: - settings: - batch_size: 64 - index: 0 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -Seattle: - callable_func: - settings: - batch_size: 64 - index: 1 - n_collaborators: 5 - test_dataset: src.experiment.mnist_test - train_dataset: src.experiment.mnist_train - template: src.experiment.callable_to_initialize_collaborator_private_attributes -aggregator: - callable_func: - settings: - batch_size: 50 - watermark_data: src.experiment.watermark_data - template: src.experiment.callable_to_initialize_aggregator_private_attributes diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py deleted file mode 100644 index 7612dc2dea..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py +++ /dev/null @@ -1,664 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb. - -# %% auto 0 -__all__ = ['random_seed', 'mnist_train', 'mnist_test', 'watermark_dir', 'watermark_path', 'watermark_data', 'display_watermark', - 'batch_size_train', 'batch_size_test', 'batch_size_watermark', 'learning_rate', 'momentum', 'log_interval', - 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', 'watermark_pretrain_weight_decay', - 'watermark_retrain_learning_rate', 'aggregator', 'collaborator_names', 'collaborators', 'local_runtime', - 'model', 'optimizer', 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'best_model', - 'round_number', 'top_model_accuracy', 'flflow', 'Net', 'inference', 'train_model', 'generate_watermark', - 'WatermarkDataset', 'get_watermark_transforms', 'FedAvg', 'FederatedFlow_MNIST_Watermarking', - 'callable_to_initialize_aggregator_private_attributes', - 'callable_to_initialize_collaborator_private_attributes'] - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 7 - - -# Uncomment this if running in Google Colab -#import os -#os.environ["USERNAME"] = "colab" - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 9 -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -import torch -import torchvision -import numpy as np -import random -import pathlib -import os -import matplotlib -import matplotlib.pyplot as plt -import PIL.Image as Image -import imagen as ig -import numbergen as ng - -random_seed = 1 -torch.backends.cudnn.enabled = False -torch.manual_seed(random_seed) - -# MNIST Train and Test datasets -mnist_train = torchvision.datasets.MNIST( - "./files/", - train=True, - download=True, - transform=torchvision.transforms.Compose( - [ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize((0.1307,), (0.3081,)), - ] - ), -) - -mnist_test = torchvision.datasets.MNIST( - "./files/", - train=False, - download=True, - transform=torchvision.transforms.Compose( - [ - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize((0.1307,), (0.3081,)), - ] - ), -) - - -class Net(nn.Module): - def __init__(self, dropout=0.0): - super(Net, self).__init__() - self.dropout = dropout - self.block = nn.Sequential( - nn.Conv2d(1, 32, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(32, 64, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(64, 128, 2), - nn.ReLU(), - ) - self.fc1 = nn.Linear(128 * 5**2, 200) - self.fc2 = nn.Linear(200, 10) - self.relu = nn.ReLU() - self.dropout = nn.Dropout(p=dropout) - - def forward(self, x): - x = self.dropout(x) - out = self.block(x) - out = out.view(-1, 128 * 5**2) - out = self.dropout(out) - out = self.relu(self.fc1(out)) - out = self.dropout(out) - out = self.fc2(out) - return F.log_softmax(out, 1) - - -def inference(network, test_loader): - network.eval() - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - output = network(data) - pred = output.data.max(1, keepdim=True)[1] - correct += pred.eq(target.data.view_as(pred)).sum() - accuracy = float(correct / len(test_loader.dataset)) - return accuracy - - -def train_model(model, optimizer, data_loader, entity, round_number, log=False): - # Helper function to train the model - train_loss = 0 - log_interval = 20 - model.train() - for batch_idx, (X, y) in enumerate(data_loader): - optimizer.zero_grad() - - output = model(X) - loss = F.nll_loss(output, y) - loss.backward() - - optimizer.step() - - train_loss += loss.item() * len(X) - if batch_idx % log_interval == 0 and log: - print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( - entity, - round_number, - batch_idx * len(X), - len(data_loader.dataset), - 100.0 * batch_idx / len(data_loader), - loss.item(), - ) - ) - train_loss /= len(data_loader.dataset) - return train_loss - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 11 -watermark_dir = "./files/watermark-dataset/MWAFFLE/" - - -def generate_watermark( - x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir -): - """ - Generate Watermark by superimposing a pattern on noisy background. - - Parameters - ---------- - x_size: x dimension of the image - y_size: y dimension of the image - num_class: number of classes in the original dataset - num_samples_per_class: number of samples to be generated per class - img_dir: directory for saving watermark dataset - - Reference - --------- - WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) - - """ - x_pattern = int(x_size * 2 / 3.0 - 1) - y_pattern = int(y_size * 2 / 3.0 - 1) - - np.random.seed(0) - for cls in range(num_class): - patterns = [] - random_seed = 10 + cls - patterns.append( - ig.Line( - xdensity=x_pattern, - ydensity=y_pattern, - thickness=0.001, - orientation=np.pi * ng.UniformRandom(seed=random_seed), - x=ng.UniformRandom(seed=random_seed) - 0.5, - y=ng.UniformRandom(seed=random_seed) - 0.5, - scale=0.8, - ) - ) - patterns.append( - ig.Arc( - xdensity=x_pattern, - ydensity=y_pattern, - thickness=0.001, - orientation=np.pi * ng.UniformRandom(seed=random_seed), - x=ng.UniformRandom(seed=random_seed) - 0.5, - y=ng.UniformRandom(seed=random_seed) - 0.5, - size=0.33, - ) - ) - - pat = np.zeros((x_pattern, y_pattern)) - for i in range(6): - j = np.random.randint(len(patterns)) - pat += patterns[j]() - res = pat > 0.5 - pat = res.astype(int) - - x_offset = np.random.randint(x_size - x_pattern + 1) - y_offset = np.random.randint(y_size - y_pattern + 1) - - for i in range(num_samples_per_class): - base = np.random.rand(x_size, y_size) - # base = np.zeros((x_input, y_input)) - base[ - x_offset : x_offset + pat.shape[0], - y_offset : y_offset + pat.shape[1], - ] += pat - d = np.ones((x_size, x_size)) - img = np.minimum(base, d) - if not os.path.exists(img_dir + str(cls) + "/"): - os.makedirs(img_dir + str(cls) + "/") - plt.imsave( - img_dir + str(cls) + "/wm_" + str(i + 1) + ".png", - img, - cmap=matplotlib.cm.gray, - ) - - -# If the Watermark dataset does not exist, generate and save the Watermark images -watermark_path = pathlib.Path(watermark_dir) -if watermark_path.exists() and watermark_path.is_dir(): - print( - f"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... " - ) - pass -else: - print(f"Generating Watermark dataset... ") - generate_watermark() - - -class WatermarkDataset(torch.utils.data.Dataset): - def __init__(self, images_dir, label_dir=None, transforms=None): - self.images_dir = os.path.abspath(images_dir) - self.image_paths = [ - os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir) - ] - self.label_paths = label_dir - self.transform = transforms - temp = [] - - # Recursively counting total number of images in the directory - for image_path in self.image_paths: - for path in os.walk(image_path): - if len(path) <= 1: - continue - path = path[2] - for im_n in [image_path + "/" + p for p in path]: - temp.append(im_n) - self.image_paths = temp - - if len(self.image_paths) == 0: - raise Exception(f"No file(s) found under {images_dir}") - - def __len__(self): - return len(self.image_paths) - - def __getitem__(self, idx): - image_filepath = self.image_paths[idx] - image = Image.open(image_filepath) - image = image.convert("RGB") - image = self.transform(image) - label = int(image_filepath.split("/")[-2]) - - return image, label - - -def get_watermark_transforms(): - return torchvision.transforms.Compose( - [ - torchvision.transforms.Grayscale(), - torchvision.transforms.Resize(28), - torchvision.transforms.ToTensor(), - torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize - ] - ) - - -watermark_data = WatermarkDataset( - images_dir=watermark_dir, - transforms=get_watermark_transforms(), -) - -# Set display_watermark to True to display the Watermark dataset -display_watermark = True -if display_watermark: - # Inspect and plot the Watermark Images - wm_images = np.empty((100, 28, 28)) - wm_labels = np.empty([100, 1], dtype=int) - - for i in range(len(watermark_data)): - img, label = watermark_data[i] - wm_labels[label * 10 + i % 10] = label - wm_images[label * 10 + i % 10, :, :] = img.numpy() - - fig = plt.figure(figsize=(120, 120)) - for i in range(100): - plt.subplot(10, 10, i + 1) - plt.imshow(wm_images[i], interpolation="none") - plt.title("Label: {}".format(wm_labels[i]), fontsize=80) - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 13 -from copy import deepcopy - -from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator -from openfl.experimental.workflow.runtime import LocalRuntime -from openfl.experimental.workflow.placement import aggregator, collaborator -from openfl.experimental.workflow.utilities.ui import InspectFlow - - -def FedAvg(agg_model, models, weights=None): - state_dicts = [model.state_dict() for model in models] - state_dict = agg_model.state_dict() - for key in models[0].state_dict(): - state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], - axis=0, - weights=weights)) - - agg_model.load_state_dict(state_dict) - return agg_model - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 15 -class FederatedFlow_MNIST_Watermarking(FLSpec): - """ - This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning - Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) - """ - - def __init__( - self, - model=None, - optimizer=None, - watermark_pretrain_optimizer=None, - watermark_retrain_optimizer=None, - round_number=0, - **kwargs, - ): - super().__init__(**kwargs) - - if model is not None: - self.model = model - self.optimizer = optimizer - self.watermark_pretrain_optimizer = watermark_pretrain_optimizer - self.watermark_retrain_optimizer = watermark_retrain_optimizer - else: - self.model = Net() - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - self.watermark_pretrain_optimizer = optim.SGD( - self.model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, - ) - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - self.round_number = round_number - self.watermark_pretraining_completed = False - - @aggregator - def start(self): - """ - This is the start of the Flow. - """ - - print(f": Start of flow ... ") - self.collaborators = self.runtime.collaborators - - # Randomly select a fraction of actual collaborator every round - fraction = 0.5 - if int(fraction * len(self.collaborators)) < 1: - raise Exception( - f"Cannot run training with {fraction*100}% selected collaborators out of {len(self.collaborators)} Collaborators. Atleast one collaborator is required to run the training" - ) - self.subset_collaborators = random.sample( - self.collaborators, int(fraction * (len(self.collaborators))) - ) - - self.next(self.watermark_pretrain) - - @aggregator - def watermark_pretrain(self): - """ - Pre-Train the Model before starting Federated Learning. - """ - if not self.watermark_pretraining_completed: - - print(": Performing Watermark Pre-training") - - for i in range(self.pretrain_epochs): - - watermark_pretrain_loss = train_model( - self.model, - self.watermark_pretrain_optimizer, - self.watermark_data_loader, - ":", - i, - log=False, - ) - watermark_pretrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print( - ": Watermark Pretraining: Round: {:<3} Loss: {:<.6f} Acc: {:<.6f}".format( - i, - watermark_pretrain_loss, - watermark_pretrain_validation_score, - ) - ) - - self.watermark_pretraining_completed = True - - self.next( - self.aggregated_model_validation, - foreach="subset_collaborators", - exclude=["watermark_pretrain_optimizer", "watermark_retrain_optimizer"], - ) - - @collaborator - def aggregated_model_validation(self): - """ - Perform Aggregated Model validation on Collaborators. - """ - self.agg_validation_score = inference(self.model, self.test_loader) - print( - f" Aggregated Model validation score = {self.agg_validation_score}" - ) - - self.next(self.train) - - @collaborator - def train(self): - """ - Train model on Local collab dataset. - - """ - print(": Performing Model Training on Local dataset ... ") - - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - - self.loss = train_model( - self.model, - self.optimizer, - self.train_loader, - ""), - self.round_number if self.round_number is not None else 0, - log=True, - ) - - self.next(self.local_model_validation) - - @collaborator - def local_model_validation(self): - """ - Validate locally trained model. - - """ - self.local_validation_score = inference(self.model, self.test_loader) - print( - f" Local model validation score = {self.local_validation_score}" - ) - self.next(self.join) - - @aggregator - def join(self, inputs): - """ - Model aggregation step. - """ - - self.average_loss = sum(input.loss for input in inputs) / len(inputs) - self.aggregated_model_accuracy = sum( - input.agg_validation_score for input in inputs - ) / len(inputs) - self.local_model_accuracy = sum( - input.local_validation_score for input in inputs - ) / len(inputs) - - print(f": Joining models from collaborators...") - - print( - f" Aggregated model validation score = {self.aggregated_model_accuracy}" - ) - print(f" Average training loss = {self.average_loss}") - print(f" Average local model validation values = {self.local_model_accuracy}") - - self.model = FedAvg(self.model, [input.model for input in inputs]) - - self.next(self.watermark_retrain) - - @aggregator - def watermark_retrain(self): - """ - Retrain the aggregated model. - - """ - print(": Performing Watermark Retraining ... ") - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - - retrain_round = 0 - - # Perform re-training until (accuracy >= acc_threshold) or (retrain_round > number of retrain_epochs) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - while ( - self.watermark_retrain_validation_score < self.watermark_acc_threshold - ) and (retrain_round < self.retrain_epochs): - self.watermark_retrain_train_loss = train_model( - self.model, - self.watermark_retrain_optimizer, - self.watermark_data_loader, - "", - retrain_round, - log=False, - ) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print( - ": Watermark Retraining: Train Epoch: {:<3} Retrain Round: {:<3} Loss: {:<.6f}, Acc: {:<.6f}".format( - self.round_number, - retrain_round, - self.watermark_retrain_train_loss, - self.watermark_retrain_validation_score, - ) - ) - - retrain_round += 1 - - self.next(self.end) - - @aggregator - def end(self): - """ - This is the last step in the Flow. - - """ - print(f"This is the end of the flow") - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 17 -# Set random seed -random_seed = 42 -torch.manual_seed(random_seed) -np.random.seed(random_seed) -torch.backends.cudnn.enabled = False - -# Batch sizes -batch_size_train = 64 -batch_size_test = 64 -batch_size_watermark = 50 - -# MNIST parameters -learning_rate = 5e-2 -momentum = 5e-1 -log_interval = 20 - -# Watermarking parameters -watermark_pretrain_learning_rate = 1e-1 -watermark_pretrain_momentum = 5e-1 -watermark_pretrain_weight_decay = 5e-05 -watermark_retrain_learning_rate = 5e-3 - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 19 -def callable_to_initialize_aggregator_private_attributes(watermark_data, batch_size): - return { - "watermark_data_loader": torch.utils.data.DataLoader( - watermark_data, batch_size=batch_size, shuffle=True - ), - "pretrain_epochs": 25, - "retrain_epochs": 25, - "watermark_acc_threshold": 0.98, - } - -# Setup Aggregator private attributes via callable function -aggregator = Aggregator( - name="agg", - private_attributes_callable=callable_to_initialize_aggregator_private_attributes, - watermark_data=watermark_data, - batch_size=batch_size_watermark, - ) - -collaborator_names = [ - "Portland", - "Seattle", - "Chandler", - "Bangalore", - "New Delhi", -] - -def callable_to_initialize_collaborator_private_attributes(index, n_collaborators, batch_size, train_dataset, test_dataset): - train = deepcopy(train_dataset) - test = deepcopy(test_dataset) - train.data = train_dataset.data[index::n_collaborators] - train.targets = train_dataset.targets[index::n_collaborators] - test.data = test_dataset.data[index::n_collaborators] - test.targets = test_dataset.targets[index::n_collaborators] - - return { - "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), - "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), - } - -# Setup Collaborators private attributes via callable function -collaborators = [] -for idx, collaborator_name in enumerate(collaborator_names): - collaborators.append( - Collaborator( - name=collaborator_name, num_cpus=0, num_gpus=0, - private_attributes_callable=callable_to_initialize_collaborator_private_attributes, - index=idx, n_collaborators=len(collaborator_names), - train_dataset=mnist_train, test_dataset=mnist_test, batch_size=64 - ) - ) - -local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend="single_process") -print(f"Local runtime collaborators = {local_runtime.collaborators}") - -# %% ../../openfl-develop-latest/openfl/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb 21 -model = Net() -optimizer = optim.SGD( - model.parameters(), lr=learning_rate, momentum=momentum -) -watermark_pretrain_optimizer = optim.SGD( - model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, -) -watermark_retrain_optimizer = optim.SGD( - model.parameters(), lr=watermark_retrain_learning_rate -) -best_model = None -round_number = 0 -top_model_accuracy = 0 - -flflow = FederatedFlow_MNIST_Watermarking( - model, - optimizer, - watermark_pretrain_optimizer, - watermark_retrain_optimizer, - round_number, - checkpoint=True, -) -flflow.runtime = local_runtime -for i in range(1): - print(f"Starting round {i}...") -# flflow.run() - flflow.round_number += 1 - if hasattr(flflow, "aggregated_model_accuracy"): - aggregated_model_accuracy = flflow.aggregated_model_accuracy - if aggregated_model_accuracy > top_model_accuracy: - print( - f"\nAccuracy improved to {aggregated_model_accuracy} for round {i}, Watermark Acc: {flflow.watermark_retrain_validation_score}\n" - ) - top_model_accuracy = aggregated_model_accuracy - best_model = flflow.model - - torch.save(best_model.state_dict(), "watermarked_mnist_model.pth") diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py b/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py deleted file mode 100644 index bb4473268c..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_script.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (C) 2020-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import shutil -import filecmp -from pathlib import Path -from openfl.experimental.workflow.notebooktools import NotebookTools - -# Define paths -NOTEBOOK_PATH = "testcase_export/301_MNIST_Watermarking.ipynb" -ACTUAL_DIR = "testcase_export/test_artifacts/actual" -EXPECTED_DIR = "testcase_export/test_artifacts/expected" - -def setup_workspace(): - """Setup function to create the actual workspace for testing.""" - # Ensure the actual directory is empty - if Path(ACTUAL_DIR).exists(): - shutil.rmtree(ACTUAL_DIR) - Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) - - # Generate workspace using NotebookTools - NotebookTools.export( - notebook_path=NOTEBOOK_PATH, - output_workspace=ACTUAL_DIR - ) - -def compare_files(file1, file2): - """Compare the content of two files, ignoring commentted lines.""" - with open(file1, "r") as f1, open(file2, "r") as f2: - lines1 = f1.readlines() - lines2 = f2.readlines() - - # Remove comment lines (lines starting with '#') - lines1 = [line for line in lines1 if not line.startswith("#")] - lines2 = [line for line in lines2 if not line.startswith("#")] - - return lines1 == lines2 - -def compare_directories(dir1, dir2): - """Compare two directories recursively, including file content.""" - comparison = filecmp.dircmp(dir1, dir2) - # Check for differences in file names or structure - if comparison.left_only or comparison.right_only: - return False - - # Compare subdirectories - for subdir in comparison.common_dirs: - if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): - return False - - # Compare file content for all common files - for file in comparison.common_files: - file1 = Path(dir1) / file - file2 = Path(dir2) / file - print(f"Comparing files: {file1} and {file2}") - if not compare_files(file1, file2): - return False - - return True - -def test_export_functionality(): - """ - Test that the workspace generated by NotebookTools matches the Expected Artifacts. - - This function compares the contents of the actual directory generated by - NotebookTools with the expected directory. - """ - # Compare the expected and actual directories - assert compare_directories(EXPECTED_DIR, ACTUAL_DIR), ( - "The workspace generated by NotebookTools does not match the expected. " - "Check the differences in the test_artifacts/expected and test_artifacts/actual folders." - ) - diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace deleted file mode 100644 index 3c2c5d08b4..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/.workspace +++ /dev/null @@ -1,2 +0,0 @@ -current_plan_name: default - diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py deleted file mode 100644 index 49883934a8..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py deleted file mode 100644 index 3ac90ade4d..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/src/experiment.py +++ /dev/null @@ -1,380 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../MNIST_Watermarking.ipynb. - -# %% auto 0 -__all__ = ['random_seed', 'director_info', 'authorized_collaborators', 'federated_runtime', 'learning_rate', 'momentum', - 'log_interval', 'watermark_pretrain_learning_rate', 'watermark_pretrain_momentum', - 'watermark_pretrain_weight_decay', 'watermark_retrain_learning_rate', 'model', 'optimizer', - 'watermark_pretrain_optimizer', 'watermark_retrain_optimizer', 'flflow', 'Net', 'inference', 'train_model', - 'FedAvg', 'FederatedFlow_MNIST_Watermarking'] - -# %% ../../../MNIST_Watermarking.ipynb 7 - -# %% ../../../MNIST_Watermarking.ipynb 9 -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -import torch -import numpy as np - -random_seed = 1 -torch.backends.cudnn.enabled = False -torch.manual_seed(random_seed) - -class Net(nn.Module): - def __init__(self, dropout=0.0): - super(Net, self).__init__() - self.dropout = dropout - self.block = nn.Sequential( - nn.Conv2d(1, 32, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(32, 64, 2), - nn.MaxPool2d(2), - nn.ReLU(), - nn.Conv2d(64, 128, 2), - nn.ReLU(), - ) - self.fc1 = nn.Linear(128 * 5**2, 200) - self.fc2 = nn.Linear(200, 10) - self.relu = nn.ReLU() - self.dropout = nn.Dropout(p=dropout) - - def forward(self, x): - x = self.dropout(x) - out = self.block(x) - out = out.view(-1, 128 * 5**2) - out = self.dropout(out) - out = self.relu(self.fc1(out)) - out = self.dropout(out) - out = self.fc2(out) - return F.log_softmax(out, 1) - - -def inference(network, test_loader): - network.eval() - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - output = network(data) - pred = output.data.max(1, keepdim=True)[1] - correct += pred.eq(target.data.view_as(pred)).sum() - accuracy = float(correct / len(test_loader.dataset)) - return accuracy - - -def train_model(model, optimizer, data_loader, entity, round_number, log=False): - # Helper function to train the model - train_loss = 0 - log_interval = 20 - model.train() - for batch_idx, (X, y) in enumerate(data_loader): - optimizer.zero_grad() - - output = model(X) - loss = F.nll_loss(output, y) - loss.backward() - - optimizer.step() - - train_loss += loss.item() * len(X) - if batch_idx % log_interval == 0 and log: - print("{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}".format( - entity, - round_number, - batch_idx * len(X), - len(data_loader.dataset), - 100.0 * batch_idx / len(data_loader), - loss.item(), - ) - ) - train_loss /= len(data_loader.dataset) - return train_loss - -# %% ../../../MNIST_Watermarking.ipynb 11 -from openfl.experimental.workflow.interface import FLSpec -from openfl.experimental.workflow.placement import aggregator, collaborator - -def FedAvg(agg_model, models, weights=None): - state_dicts = [model.state_dict() for model in models] - state_dict = agg_model.state_dict() - for key in models[0].state_dict(): - state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], - axis=0, - weights=weights)) - - agg_model.load_state_dict(state_dict) - return agg_model - -# %% ../../../MNIST_Watermarking.ipynb 13 -class FederatedFlow_MNIST_Watermarking(FLSpec): - """ - This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning - Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) - """ - - def __init__( - self, - model=None, - optimizer=None, - watermark_pretrain_optimizer=None, - watermark_retrain_optimizer=None, - round_number=0, - n_rounds=3, - **kwargs, - ): - super().__init__(**kwargs) - - if model is not None: - self.model = model - self.optimizer = optimizer - self.watermark_pretrain_optimizer = watermark_pretrain_optimizer - self.watermark_retrain_optimizer = watermark_retrain_optimizer - else: - self.model = Net() - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - self.watermark_pretrain_optimizer = optim.SGD( - self.model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, - ) - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - self.round_number = round_number - self.n_rounds = n_rounds - self.watermark_pretraining_completed = False - - @aggregator - def start(self): - """ - This is the start of the Flow. - """ - print(": Start of flow ... ") - self.collaborators = self.runtime.collaborators - - self.next(self.watermark_pretrain) - - @aggregator - def watermark_pretrain(self): - """ - Pre-Train the Model before starting Federated Learning. - """ - if not self.watermark_pretraining_completed: - - print(": Performing Watermark Pre-training") - - for i in range(self.pretrain_epochs): - - watermark_pretrain_loss = train_model( - self.model, - self.watermark_pretrain_optimizer, - self.watermark_data_loader, - ":", - i, - log=False, - ) - watermark_pretrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print(f": Watermark Pretraining: Round: {i:<3}" - + f" Loss: {watermark_pretrain_loss:<.6f}" - + f" Acc: {watermark_pretrain_validation_score:<.6f}") - - self.watermark_pretraining_completed = True - - self.next( - self.aggregated_model_validation, - foreach="collaborators", - ) - - @collaborator - def aggregated_model_validation(self): - """ - Perform Aggregated Model validation on Collaborators. - """ - self.agg_validation_score = inference(self.model, self.test_loader) - print(f"" - + f" Aggregated Model validation score = {self.agg_validation_score}" - ) - - self.next(self.train) - - @collaborator - def train(self): - """ - Train model on Local collab dataset. - """ - print(": Performing Model Training on Local dataset ... ") - - self.optimizer = optim.SGD( - self.model.parameters(), lr=learning_rate, momentum=momentum - ) - - self.loss = train_model( - self.model, - self.optimizer, - self.train_loader, - f"", - self.round_number, - log=True, - ) - - self.next(self.local_model_validation) - - @collaborator - def local_model_validation(self): - """ - Validate locally trained model. - """ - self.local_validation_score = inference(self.model, self.test_loader) - print( - f" Local model validation score = {self.local_validation_score}" - ) - self.next(self.join) - - @aggregator - def join(self, inputs): - """ - Model aggregation step. - """ - self.average_loss = sum(input.loss for input in inputs) / len(inputs) - self.aggregated_model_accuracy = sum( - input.agg_validation_score for input in inputs - ) / len(inputs) - self.local_model_accuracy = sum( - input.local_validation_score for input in inputs - ) / len(inputs) - - print(": Joining models from collaborators...") - - print( - f" Aggregated model validation score = {self.aggregated_model_accuracy}" - ) - print(f" Average training loss = {self.average_loss}") - print(f" Average local model validation values = {self.local_model_accuracy}") - - self.model = FedAvg(self.model, [input.model for input in inputs]) - - self.next(self.watermark_retrain) - - @aggregator - def watermark_retrain(self): - """ - Retrain the aggregated model. - """ - print(": Performing Watermark Retraining ... ") - self.watermark_retrain_optimizer = optim.SGD( - self.model.parameters(), lr=watermark_retrain_learning_rate - ) - - retrain_round = 0 - - # Perform re-training until (accuracy >= acc_threshold) or - # (retrain_round > number of retrain_epochs) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - while ( - self.watermark_retrain_validation_score < self.watermark_acc_threshold - ) and (retrain_round < self.retrain_epochs): - self.watermark_retrain_train_loss = train_model( - self.model, - self.watermark_retrain_optimizer, - self.watermark_data_loader, - "", - retrain_round, - log=False, - ) - self.watermark_retrain_validation_score = inference( - self.model, self.watermark_data_loader - ) - - print(f": Watermark Retraining: Train Epoch: {self.round_number:<3}" - + f" Retrain Round: {retrain_round:<3}" - + f" Loss: {self.watermark_retrain_train_loss:<.6f}," - + f" Acc: {self.watermark_retrain_validation_score:<.6f}") - retrain_round += 1 - - self.next(self.internal_loop) - - @aggregator - def internal_loop(self): - """ - Internal loop to continue the Federated Learning process. - """ - if self.round_number == self.n_rounds - 1: - print(f"\nCompleted training for all {self.n_rounds} round(s)") - self.next(self.end) - else: - self.round_number += 1 - print(f"\nCompleted round: {self.round_number}") - self.next(self.aggregated_model_validation, foreach='collaborators') - - @aggregator - def end(self): - """ - This is the last step in the Flow. - """ - print("This is the end of the flow") - -# %% ../../../MNIST_Watermarking.ipynb 15 -from openfl.experimental.workflow.runtime import FederatedRuntime - -director_info = { - 'director_node_fqdn':'localhost', - 'director_port':50050, -} - -authorized_collaborators = ['Bangalore', 'Chandler'] - -federated_runtime = FederatedRuntime( - collaborators=authorized_collaborators, - director=director_info, - notebook_path='./MNIST_Watermarking.ipynb', -) - -# %% ../../../MNIST_Watermarking.ipynb 19 -# Set random seed -random_seed = 42 -torch.manual_seed(random_seed) -np.random.seed(random_seed) -torch.backends.cudnn.enabled = False - -# MNIST parameters -learning_rate = 5e-2 -momentum = 5e-1 -log_interval = 20 - -# Watermarking parameters -watermark_pretrain_learning_rate = 1e-1 -watermark_pretrain_momentum = 5e-1 -watermark_pretrain_weight_decay = 5e-05 -watermark_retrain_learning_rate = 5e-3 - -model = Net() -optimizer = optim.SGD( - model.parameters(), lr=learning_rate, momentum=momentum -) -watermark_pretrain_optimizer = optim.SGD( - model.parameters(), - lr=watermark_pretrain_learning_rate, - momentum=watermark_pretrain_momentum, - weight_decay=watermark_pretrain_weight_decay, -) -watermark_retrain_optimizer = optim.SGD( - model.parameters(), lr=watermark_retrain_learning_rate -) - -flflow = FederatedFlow_MNIST_Watermarking( - model, - optimizer, - watermark_pretrain_optimizer, - watermark_retrain_optimizer, - checkpoint=True, -) -flflow.runtime = federated_runtime -# flflow.run() diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace deleted file mode 100644 index 3c2c5d08b4..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace +++ /dev/null @@ -1,2 +0,0 @@ -current_plan_name: default - diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml deleted file mode 100644 index f29bada0f1..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml +++ /dev/null @@ -1,25 +0,0 @@ -aggregator: - defaults: plan/defaults/aggregator.yaml - settings: - rounds_to_train: 1 - template: openfl.experimental.workflow.component.Aggregator -collaborator: - defaults: plan/defaults/collaborator.yaml - settings: {} - template: openfl.experimental.workflow.component.Collaborator -federated_flow: - settings: - checkpoint: true - model: src.experiment.model - optimizer: src.experiment.optimizer - watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer - watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer - template: src.experiment.FederatedFlow_MNIST_Watermarking -network: - settings: - agg_addr: localhost - agg_port: 53798 - client_reconnect_interval: 5 - disable_client_auth: false - tls: false - template: openfl.federation.Network diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt deleted file mode 100644 index 2a7f08eab8..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability -matplotlib -torch==2.3.1 -torchvision==0.18.1 -git+https://github.com/pyviz-topics/imagen.git@master -holoviews==1.15.4 -ipywidgets diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py deleted file mode 100644 index 49883934a8..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2020-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py b/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py deleted file mode 100644 index 901e403dfa..0000000000 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (C) 2020-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import shutil -import filecmp -from pathlib import Path -from openfl.experimental.workflow.runtime import FederatedRuntime -from openfl.experimental.workflow.notebooktools import NotebookTools - -# Define paths -NOTEBOOK_PATH = "testcase_export_federated/MNIST_Watermarking.ipynb" -ACTUAL_DIR = "testcase_export_federated/test_artifacts/actual" -EXPECTED_DIR = "testcase_export_federated/test_artifacts/expected" - -# Setup for FederatedRuntime -director_info = { - 'director_node_fqdn': 'localhost', - 'director_port': 50050, -} - -authorized_collaborators = ['Bangalore', 'Chandler'] - -# Creating an instance of FederatedRuntime -federated_runtime = FederatedRuntime( - collaborators=authorized_collaborators, - director=director_info, - notebook_path=NOTEBOOK_PATH, - tls=False # Actual testcase tls is set to false -) - -def setup_workspace(): - """Setup function to create the actual workspace for testing.""" - # Ensure the actual directory is empty - if Path(ACTUAL_DIR).exists(): - shutil.rmtree(ACTUAL_DIR) - Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) - - # Use the FederatedRuntime instance to get the parameters - notebook_path = federated_runtime.notebook_path - director_fqdn = federated_runtime.director["director_node_fqdn"] - tls = federated_runtime.tls - - # Generate workspace using NotebookTools - NotebookTools.export_federated( - notebook_path=notebook_path, - output_workspace=ACTUAL_DIR, - director_fqdn=director_fqdn, - tls=tls - ) - -def compare_files(file1, file2): - """Compare the content of two files, ignoring comment lines (lines starting with '#').""" - with open(file1, "r") as f1, open(file2, "r") as f2: - lines1 = f1.readlines() - lines2 = f2.readlines() - - # Remove comment lines (lines starting with '#') - lines1 = [line for line in lines1 if not line.startswith("#")] - lines2 = [line for line in lines2 if not line.startswith("#")] - - return lines1 == lines2 - -def compare_directories(dir1, dir2): - """Compare two directories recursively, including file content.""" - comparison = filecmp.dircmp(dir1, dir2) - - # Check for differences in file names or structure - if comparison.left_only or comparison.right_only: - return False - - # Compare subdirectories - for subdir in comparison.common_dirs: - if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): - return False - - # Compare file content for all common files - for file in comparison.common_files: - file1 = Path(dir1) / file - file2 = Path(dir2) / file - if not compare_files(file1, file2): - return False - - return True - -def test_export_federated_functionality(): - """Test that the workspace generated by NotebookTools matches the Expected Artifacts. - - This function compares the contents of the actual directory generated by - NotebookTools with the expected directory. - """ - # Compare the expected and actual directories - assert compare_directories(EXPECTED_DIR, ACTUAL_DIR), ( - "The workspace generated by NotebookTools does not match the expected. " - "Check the differences in the test_artifacts/expected and test_artifacts/actual folders." - ) \ No newline at end of file diff --git a/tests/openfl/experimental/workflow/NotebookTools/README.md b/tests/openfl/experimental/workflow/NotebookTools/README.md new file mode 100644 index 0000000000..6f48f3bcac --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/README.md @@ -0,0 +1,50 @@ +# Objective + +Validate `NotebookTools.export()` and `NotebookTools.export_federated()` APIs that are used to convert the JupyterNotebook into Workflow API experiments + + +# Test Structure + +``` +tests/openfl/experimental/workflow/NotebookTools + +├── test_export +│ ├── test_artifacts # Actual output of the testcase, and is generated when the test is executed. +│ │ └── expected # Expected output to compare with actual output which is predefined and stored +│ ├── test_101_MNIST # Notebook used for testing +│ └── test_script.py # test script file to run the tests +├── test_export_federated +│ ├── test_artifacts # Actual output of the testcase is generated when the test is executed. +│ │ └── expected # Expected output to compare with actual output which is predefined and stored +│ ├── test_MNIST_Watermarking # Notebook used for testing +│ └── test_script.py # test script file to run the tests +├── READ.md # Readme File +``` + +## Usage + +Ensure that pytest and all dependencies for Workflow Interface are installed in virtual environment + +- For running `test_export` + +Navigate to the directory + +`tests/openfl/experimental/workflow/NotebookTools/test_export` + +To run a specific test case, use below command: + +```sh +pytest -s test_script.py +``` + +- For running `test_export_federated` + +Navigate to the directory + +`tests/openfl/experimental/workflow/NotebookTools/test_export_federated` + +To run a specific test case, use below command: + +```sh +pytest -s test_script.py +``` \ No newline at end of file diff --git a/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_101_MNIST.ipynb b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_101_MNIST.ipynb new file mode 100644 index 0000000000..119bdcfd28 --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_101_MNIST.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "14821d97", + "metadata": {}, + "source": [ + "# 101_MNIST Reference for Testing \n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fc8e35da", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "cell_type": "markdown", + "id": "072cac19", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f07a0f3", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "a30ac7fc", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7f98600", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "!pip install git+https://github.com/securefederatedai/openfl.git\n", + "!pip install torch\n", + "!pip install torchvision\n", + "!pip install -U ipywidgets\n", + "\n", + "# Uncomment this if running in Google Colab and set USERNAME if running in docker container.\n", + "# !pip install -r https://raw.githubusercontent.com/intel/openfl/develop/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt\n", + "# import os\n", + "# os.environ[\"USERNAME\"] = \"colab\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e85e030", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "import torchvision\n", + "import numpy as np\n", + "\n", + "n_epochs = 3\n", + "batch_size_train = 64\n", + "batch_size_test = 1000\n", + "learning_rate = 0.01\n", + "momentum = 0.5\n", + "log_interval = 10\n", + "\n", + "random_seed = 1\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "\n", + "mnist_train = torchvision.datasets.MNIST(\n", + " \"./files/\",\n", + " train=True,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")\n", + "\n", + "mnist_test = torchvision.datasets.MNIST(\n", + " \"./files/\",\n", + " train=False,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", + " self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n", + " self.conv2_drop = nn.Dropout2d()\n", + " self.fc1 = nn.Linear(320, 50)\n", + " self.fc2 = nn.Linear(50, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", + " x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n", + " x = x.view(-1, 320)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.dropout(x, training=self.training)\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x)\n", + "\n", + "def inference(network,test_loader):\n", + " network.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = network(data)\n", + " test_loss += F.nll_loss(output, target, size_average=False).item()\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " test_loss /= len(test_loader.dataset)\n", + " print('\\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", + " test_loss, correct, len(test_loader.dataset),\n", + " 100. * correct / len(test_loader.dataset)))\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "precise-studio", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from copy import deepcopy\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator\n", + "from openfl.experimental.workflow.runtime import LocalRuntime\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "\n", + "def FedAvg(models, weights=None):\n", + " new_model = models[0]\n", + " state_dicts = [model.state_dict() for model in models]\n", + " state_dict = new_model.state_dict()\n", + " for key in models[1].state_dict():\n", + " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],\n", + " axis=0, \n", + " weights=weights))\n", + " new_model.load_state_dict(state_dict)\n", + " return new_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "difficult-madrid", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class FederatedFlow(FLSpec):\n", + "\n", + " def __init__(self, model=None, optimizer=None, rounds=3, **kwargs):\n", + " super().__init__(**kwargs)\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " else:\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate,\n", + " momentum=momentum)\n", + " self.rounds = rounds\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " print(f'Performing initialization for model')\n", + " self.collaborators = self.runtime.collaborators\n", + " self.private = 10\n", + " self.current_round = 0\n", + " self.next(self.aggregated_model_validation, foreach='collaborators', exclude=['private'])\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " print(f'Performing aggregated model validation for collaborator {self.input}')\n", + " self.agg_validation_score = inference(self.model, self.test_loader)\n", + " print(f'{self.input} value of {self.agg_validation_score}')\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " self.model.train()\n", + " self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate,\n", + " momentum=momentum)\n", + " train_losses = []\n", + " for batch_idx, (data, target) in enumerate(self.train_loader):\n", + " self.optimizer.zero_grad()\n", + " output = self.model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " self.optimizer.step()\n", + " if batch_idx % log_interval == 0:\n", + " print('Train Epoch: 1 [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " batch_idx * len(data), len(self.train_loader.dataset),\n", + " 100. * batch_idx / len(self.train_loader), loss.item()))\n", + " self.loss = loss.item()\n", + " torch.save(self.model.state_dict(), 'model.pth')\n", + " torch.save(self.optimizer.state_dict(), 'optimizer.pth')\n", + " self.training_completed = True\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " self.local_validation_score = inference(self.model, self.test_loader)\n", + " print(\n", + " f'Doing local model validation for collaborator {self.input}: {self.local_validation_score}')\n", + " self.next(self.join, exclude=['training_completed'])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score for input in inputs) / len(inputs)\n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score for input in inputs) / len(inputs)\n", + " print(f'Average aggregated model validation values = {self.aggregated_model_accuracy}')\n", + " print(f'Average training loss = {self.average_loss}')\n", + " print(f'Average local model validation values = {self.local_model_accuracy}')\n", + " self.model = FedAvg([input.model for input in inputs])\n", + " self.optimizer = [input.optimizer for input in inputs][0]\n", + " self.current_round += 1\n", + " if self.current_round < self.rounds:\n", + " self.next(self.aggregated_model_validation,\n", + " foreach='collaborators', exclude=['private'])\n", + " else:\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " print(f'This is the end of the flow')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "forward-world", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# Setup participants\n", + "aggregator = Aggregator()\n", + "aggregator.private_attributes = {}\n", + "\n", + "# Setup collaborators with private attributes\n", + "collaborator_names = ['Portland', 'Seattle', 'Chandler','Bangalore']\n", + "collaborators = [Collaborator(name=name) for name in collaborator_names]\n", + "for idx, collaborator in enumerate(collaborators):\n", + " local_train = deepcopy(mnist_train)\n", + " local_test = deepcopy(mnist_test)\n", + " local_train.data = mnist_train.data[idx::len(collaborators)]\n", + " local_train.targets = mnist_train.targets[idx::len(collaborators)]\n", + " local_test.data = mnist_test.data[idx::len(collaborators)]\n", + " local_test.targets = mnist_test.targets[idx::len(collaborators)]\n", + " collaborator.private_attributes = {\n", + " 'train_loader': torch.utils.data.DataLoader(local_train,batch_size=batch_size_train, shuffle=True),\n", + " 'test_loader': torch.utils.data.DataLoader(local_test,batch_size=batch_size_train, shuffle=True)\n", + " }\n", + "\n", + "local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend='single_process')\n", + "print(f'Local runtime collaborators = {local_runtime.collaborators}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a175b4d6", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "model = None\n", + "best_model = None\n", + "optimizer = None\n", + "flflow = FederatedFlow(model, optimizer, rounds=2, checkpoint=True)\n", + "flflow.runtime = local_runtime" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir_workspace_3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/.workspace rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/cols.yaml rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/cols.yaml diff --git a/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml new file mode 100644 index 0000000000..8e36aa2703 --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/data.yaml @@ -0,0 +1,8 @@ +Bangalore: + private_attributes: src.experiment.Bangalore_private_attributes +Chandler: + private_attributes: src.experiment.Chandler_private_attributes +Portland: + private_attributes: src.experiment.Portland_private_attributes +Seattle: + private_attributes: src.experiment.Seattle_private_attributes diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/plan/defaults rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml similarity index 55% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml index c9bea91dfa..92b1c686c6 100644 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/plan.yaml @@ -10,11 +10,9 @@ collaborator: federated_flow: settings: checkpoint: true - model: src.experiment.model - optimizer: src.experiment.optimizer - round_number: 0 - watermark_pretrain_optimizer: src.experiment.watermark_pretrain_optimizer - watermark_retrain_optimizer: src.experiment.watermark_retrain_optimizer - template: src.experiment.FederatedFlow_MNIST_Watermarking + model: null + optimizer: null + rounds: 2 + template: src.experiment.FederatedFlow network: defaults: plan/defaults/network.yaml diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt similarity index 53% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt index 8946ff2cac..7486b2d399 100644 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/requirements.txt @@ -1,6 +1,4 @@ wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability torch torchvision -matplotlib -git+https://github.com/pyviz-topics/imagen.git@master -holoviews==1.15.4 +ipywidgets diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/actual/src/__init__.py rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py diff --git a/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py new file mode 100644 index 0000000000..782ece855f --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/experiment.py @@ -0,0 +1,228 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../../101_MNIST.ipynb. + +# %% auto 0 +__all__ = ['n_epochs', 'batch_size_train', 'batch_size_test', 'learning_rate', 'momentum', 'log_interval', 'random_seed', + 'mnist_train', 'mnist_test', 'aggregator', 'collaborator_names', 'collaborators', 'local_runtime', 'model', + 'best_model', 'optimizer', 'flflow', 'Net', 'inference', 'FedAvg', 'FederatedFlow'] + +# %% ../../../101_MNIST.ipynb 8 + +# Uncomment this if running in Google Colab and set USERNAME if running in docker container. +# import os +# os.environ["USERNAME"] = "colab" + +# %% ../../../101_MNIST.ipynb 10 +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torch +import torchvision +import numpy as np + +n_epochs = 3 +batch_size_train = 64 +batch_size_test = 1000 +learning_rate = 0.01 +momentum = 0.5 +log_interval = 10 + +random_seed = 1 +torch.backends.cudnn.enabled = False +torch.manual_seed(random_seed) + +mnist_train = torchvision.datasets.MNIST( + "./files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +mnist_test = torchvision.datasets.MNIST( + "./files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 10, kernel_size=5) + self.conv2 = nn.Conv2d(10, 20, kernel_size=5) + self.conv2_drop = nn.Dropout2d() + self.fc1 = nn.Linear(320, 50) + self.fc2 = nn.Linear(50, 10) + + def forward(self, x): + x = F.relu(F.max_pool2d(self.conv1(x), 2)) + x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) + x = x.view(-1, 320) + x = F.relu(self.fc1(x)) + x = F.dropout(x, training=self.training) + x = self.fc2(x) + return F.log_softmax(x) + +def inference(network,test_loader): + network.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + output = network(data) + test_loss += F.nll_loss(output, target, size_average=False).item() + pred = output.data.max(1, keepdim=True)[1] + correct += pred.eq(target.data.view_as(pred)).sum() + test_loss /= len(test_loader.dataset) + print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( + test_loss, correct, len(test_loader.dataset), + 100. * correct / len(test_loader.dataset))) + accuracy = float(correct / len(test_loader.dataset)) + return accuracy + +# %% ../../../101_MNIST.ipynb 12 +from copy import deepcopy + +from openfl.experimental.workflow.interface import FLSpec, Aggregator, Collaborator +from openfl.experimental.workflow.runtime import LocalRuntime +from openfl.experimental.workflow.placement import aggregator, collaborator + + +def FedAvg(models, weights=None): + new_model = models[0] + state_dicts = [model.state_dict() for model in models] + state_dict = new_model.state_dict() + for key in models[1].state_dict(): + state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts], + axis=0, + weights=weights)) + new_model.load_state_dict(state_dict) + return new_model + +# %% ../../../101_MNIST.ipynb 14 +class FederatedFlow(FLSpec): + + def __init__(self, model=None, optimizer=None, rounds=3, **kwargs): + super().__init__(**kwargs) + if model is not None: + self.model = model + self.optimizer = optimizer + else: + self.model = Net() + self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, + momentum=momentum) + self.rounds = rounds + + @aggregator + def start(self): + print(f'Performing initialization for model') + self.collaborators = self.runtime.collaborators + self.private = 10 + self.current_round = 0 + self.next(self.aggregated_model_validation, foreach='collaborators', exclude=['private']) + + @collaborator + def aggregated_model_validation(self): + print(f'Performing aggregated model validation for collaborator {self.input}') + self.agg_validation_score = inference(self.model, self.test_loader) + print(f'{self.input} value of {self.agg_validation_score}') + self.next(self.train) + + @collaborator + def train(self): + self.model.train() + self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, + momentum=momentum) + train_losses = [] + for batch_idx, (data, target) in enumerate(self.train_loader): + self.optimizer.zero_grad() + output = self.model(data) + loss = F.nll_loss(output, target) + loss.backward() + self.optimizer.step() + if batch_idx % log_interval == 0: + print('Train Epoch: 1 [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + batch_idx * len(data), len(self.train_loader.dataset), + 100. * batch_idx / len(self.train_loader), loss.item())) + self.loss = loss.item() + torch.save(self.model.state_dict(), 'model.pth') + torch.save(self.optimizer.state_dict(), 'optimizer.pth') + self.training_completed = True + self.next(self.local_model_validation) + + @collaborator + def local_model_validation(self): + self.local_validation_score = inference(self.model, self.test_loader) + print( + f'Doing local model validation for collaborator {self.input}: {self.local_validation_score}') + self.next(self.join, exclude=['training_completed']) + + @aggregator + def join(self, inputs): + self.average_loss = sum(input.loss for input in inputs) / len(inputs) + self.aggregated_model_accuracy = sum( + input.agg_validation_score for input in inputs) / len(inputs) + self.local_model_accuracy = sum( + input.local_validation_score for input in inputs) / len(inputs) + print(f'Average aggregated model validation values = {self.aggregated_model_accuracy}') + print(f'Average training loss = {self.average_loss}') + print(f'Average local model validation values = {self.local_model_accuracy}') + self.model = FedAvg([input.model for input in inputs]) + self.optimizer = [input.optimizer for input in inputs][0] + self.current_round += 1 + if self.current_round < self.rounds: + self.next(self.aggregated_model_validation, + foreach='collaborators', exclude=['private']) + else: + self.next(self.end) + + @aggregator + def end(self): + print(f'This is the end of the flow') + +# %% ../../../101_MNIST.ipynb 16 +# Setup participants +aggregator = Aggregator() +aggregator.private_attributes = {} + +# Setup collaborators with private attributes +collaborator_names = ['Portland', 'Seattle', 'Chandler','Bangalore'] +collaborators = [Collaborator(name=name) for name in collaborator_names] +for idx, collaborator in enumerate(collaborators): + local_train = deepcopy(mnist_train) + local_test = deepcopy(mnist_test) + local_train.data = mnist_train.data[idx::len(collaborators)] + local_train.targets = mnist_train.targets[idx::len(collaborators)] + local_test.data = mnist_test.data[idx::len(collaborators)] + local_test.targets = mnist_test.targets[idx::len(collaborators)] + collaborator.private_attributes = { + 'train_loader': torch.utils.data.DataLoader(local_train,batch_size=batch_size_train, shuffle=True), + 'test_loader': torch.utils.data.DataLoader(local_test,batch_size=batch_size_train, shuffle=True) + } + +local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend='single_process') +print(f'Local runtime collaborators = {local_runtime.collaborators}') + +# %% ../../../101_MNIST.ipynb 18 +model = None +best_model = None +optimizer = None +flflow = FederatedFlow(model, optimizer, rounds=2, checkpoint=True) +flflow.runtime = local_runtime + +runtime_local = flflow._runtime + +runtime_collaborators = runtime_local._LocalRuntime__collaborators +Portland_private_attributes = runtime_collaborators['Portland'].private_attributes +Seattle_private_attributes = runtime_collaborators['Seattle'].private_attributes +Chandler_private_attributes = runtime_collaborators['Chandler'].private_attributes +Bangalore_private_attributes = runtime_collaborators['Bangalore'].private_attributes \ No newline at end of file diff --git a/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_script.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_script.py new file mode 100644 index 0000000000..d523b68241 --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export/test_script.py @@ -0,0 +1,112 @@ +# Copyright (C) 2020-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import shutil +import filecmp +from pathlib import Path +import pytest +from openfl.experimental.workflow.notebooktools import NotebookTools + +class bcolors: + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + +# Define paths +NOTEBOOK_PATH = "./test_101_MNIST.ipynb" +ACTUAL_DIR = "test_artifacts/actual" +EXPECTED_DIR = "test_artifacts/expected" + +@pytest.fixture +def setup_workspace(): + """Setup function to create the actual workspace for testing.""" + + print(f"{bcolors.OKBLUE}Setting up the workspace.{bcolors.ENDC}") + # Ensure the actual directory is empty + if Path(ACTUAL_DIR).exists(): + shutil.rmtree(ACTUAL_DIR) + Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) + print(f"{bcolors.OKGREEN}Workspace setup complete.{bcolors.ENDC}") + +def validate_generated_workspace(): + """Validate that the generated workspace matches the expected artifacts.""" + print(f"{bcolors.OKBLUE}Validating the generated workspace.{bcolors.ENDC}") + + # Compare the expected and actual directories + if not compare_directories(EXPECTED_DIR, ACTUAL_DIR): + print(f"{bcolors.FAIL}❌ Test failed - The workspace generated by NotebookTools export does not match the expected.{bcolors.ENDC}") + assert False, "The workspace generated by NotebookTools export functionality does not match the expected." + else: + print(f"{bcolors.OKGREEN}✔️ Test passed - The generated workspace matches the expected artifacts.{bcolors.ENDC}") + +def compare_files(file1, file2): + """Compare the content of two files, ignoring comment lines.""" + with open(file1, "r") as f1, open(file2, "r") as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + # Remove comment lines (lines starting with '#') + lines1 = [line for line in lines1 if not line.startswith("#")] + lines2 = [line for line in lines2 if not line.startswith("#")] + + if lines1 == lines2: + print(f"{bcolors.OKGREEN}✅ Successfully compared: {file1} and {file2}{bcolors.ENDC}") + return True + else: + print(f"{bcolors.FAIL}Comparison failed: {file1} and {file2}{bcolors.ENDC}") + print(f"{bcolors.FAIL}Differences:{bcolors.ENDC}") + for line1, line2 in zip(lines1, lines2): + if line1 != line2: + print(f"{bcolors.FAIL}Expected: {line1.strip()}{bcolors.ENDC}") + print(f"{bcolors.FAIL}Actual: {line2.strip()}{bcolors.ENDC}") + return False + +def compare_directories(dir1, dir2): + """Compare two directories recursively, including file content.""" + comparison = filecmp.dircmp(dir1, dir2) + + # Check for differences in file names or structure + if comparison.left_only or comparison.right_only: + print(f"{bcolors.FAIL}Differences found in directory structure: {comparison.left_only} only in {dir1}, {comparison.right_only} only in {dir2}{bcolors.ENDC}") + return False + + # Compare subdirectories, excluding __pycache__ + for subdir in comparison.common_dirs: + if subdir == "__pycache__": + continue + if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): + return False + + # Compare file content for all common files + for file in comparison.common_files: + file1 = Path(dir1) / file + file2 = Path(dir2) / file + print(f"{bcolors.OKCYAN} Comparing files of expected and generated workspace.{bcolors.ENDC}") + if not compare_files(file1, file2): + return False + + return True + +def test_export_functionality(setup_workspace): + """ + Test the workspace generated by NotebookTools export functionality matches the Expected Artifacts. + This function compares the contents of the actual directory generated by + NotebookTools with the expected directory. + """ + # NotebookTools export generate the workspace. + print(f"{bcolors.OKBLUE}Calling ... NotebookTools export functionality to generate the actual workspace.{bcolors.ENDC}") + + NotebookTools.export( + notebook_path=NOTEBOOK_PATH, + output_workspace=ACTUAL_DIR + ) + print(f"{bcolors.OKGREEN}NotebookTools execution complete.{bcolors.ENDC}") + + # Validate that the generated workspace matches the expected output + validate_generated_workspace() diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_MNIST_Watermarking.ipynb similarity index 86% rename from tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_MNIST_Watermarking.ipynb index 0ee4c67681..459fc646d9 100644 --- a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/MNIST_Watermarking.ipynb +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_MNIST_Watermarking.ipynb @@ -6,15 +6,7 @@ "id": "dc13070c", "metadata": {}, "source": [ - "# Federated Runtime: 301_MNIST_Watermarking" - ] - }, - { - "cell_type": "markdown", - "id": "3b7357ef", - "metadata": {}, - "source": [ - "This tutorial is based on the LocalRuntime example [301_MNIST_Watermarking](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb). It has been adapted to demonstrate the FederatedRuntime version of the watermarking workflow. In this tutorial, we will guide you through the process of deploying the watermarking example within a federation, showcasing how to transition from a local setup to a federated environment effectively." + "# MNIST_Watermarking Reference for Testing" ] }, { @@ -55,18 +47,9 @@ "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" ] }, - { - "cell_type": "markdown", - "id": "2e19dcf2", - "metadata": {}, - "source": [ - "We start by installing OpenFL and dependencies of the workflow interface \n", - "> These dependencies are required to be exported and become the requirements for the Federated Learning Workspace " - ] - }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f7475cba", "metadata": {}, "outputs": [], @@ -74,7 +57,6 @@ "#| export\n", "\n", "!pip install git+https://github.com/securefederatedai/openfl.git\n", - "!pip install -r ../../../workflow_interface_requirements.txt\n", "!pip install matplotlib\n", "!pip install torch==2.3.1\n", "!pip install torchvision==0.18.1\n", @@ -83,16 +65,6 @@ "!pip install -U ipywidgets" ] }, - { - "cell_type": "markdown", - "id": "9a6ae8e2", - "metadata": {}, - "source": [ - "We now define our model, optimizer, and some helper functions like we would for any other deep learning experiment \n", - "\n", - "> This cell and all the subsequent cells are important ingredients of the Federated Learning experiment and therefore annotated with the `#| export` directive" - ] - }, { "cell_type": "code", "execution_count": 3, @@ -183,14 +155,6 @@ " return train_loss" ] }, - { - "cell_type": "markdown", - "id": "d0849d57", - "metadata": {}, - "source": [ - "Next we import the `FLSpec` & placement decorators (`aggregator/collaborator`)" - ] - }, { "cell_type": "code", "execution_count": 4, @@ -215,14 +179,6 @@ " return agg_model" ] }, - { - "cell_type": "markdown", - "id": "36ed5e31", - "metadata": {}, - "source": [ - "Let us now define the Workflow for Watermark embedding." - ] - }, { "cell_type": "code", "execution_count": null, @@ -448,18 +404,6 @@ " print(\"This is the end of the flow\")" ] }, - { - "cell_type": "markdown", - "id": "b5371b6d", - "metadata": {}, - "source": [ - "## Defining and Initializing the Federated Runtime\n", - "We initialize the Federated Runtime by providing:\n", - "- `director_info`: The director's connection information \n", - "- `authorized_collaborators`: A list of authorized collaborators\n", - "- `notebook_path`: Path to this Jupyter notebook." - ] - }, { "cell_type": "code", "execution_count": 6, @@ -485,32 +429,6 @@ ")" ] }, - { - "cell_type": "markdown", - "id": "6de9684f", - "metadata": {}, - "source": [ - "The status of the connected Envoys can be checked using the `get_envoys()` method of the `federated_runtime`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f1be87f", - "metadata": {}, - "outputs": [], - "source": [ - "federated_runtime.get_envoys()" - ] - }, - { - "cell_type": "markdown", - "id": "0eaeca25", - "metadata": {}, - "source": [ - "With the federated_runtime now instantiated, we will proceed to deploy the watermarking workspace and run the experiment!" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/.workspace rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/.workspace diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/plan/defaults rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/defaults diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/plan/plan.yaml rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/plan/plan.yaml diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/actual/requirements.txt rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/requirements.txt diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export/test_artifacts/expected/src/__init__.py rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/__init__.py diff --git a/tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py similarity index 100% rename from tests/github/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py rename to tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_artifacts/expected/src/experiment.py diff --git a/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py new file mode 100644 index 0000000000..57ce547bbd --- /dev/null +++ b/tests/openfl/experimental/workflow/NotebookTools/testcase_export_federated/test_script.py @@ -0,0 +1,134 @@ +# Copyright (C) 2020-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import shutil +import filecmp +from pathlib import Path +import pytest +from openfl.experimental.workflow.runtime import FederatedRuntime +from openfl.experimental.workflow.notebooktools import NotebookTools + +class bcolors: + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + + +# Define paths +NOTEBOOK_PATH = "./test_MNIST_Watermarking.ipynb" +ACTUAL_DIR = "test_artifacts/actual" +EXPECTED_DIR = "test_artifacts/expected" + +# Setup for FederatedRuntime +director_info = { + 'director_node_fqdn': 'localhost', + 'director_port': 50050, +} + +authorized_collaborators = ['Bangalore', 'Chandler'] + +# Creating an instance of FederatedRuntime +federated_runtime = FederatedRuntime( + collaborators=authorized_collaborators, + director=director_info, + notebook_path=NOTEBOOK_PATH, + tls=False +) + +@pytest.fixture() +def setup_workspace(): + """Setup function to create the actual workspace for testing.""" + + print(f"{bcolors.OKBLUE}Setting up the workspace.{bcolors.ENDC}") + # Ensure the actual directory is empty + if Path(ACTUAL_DIR).exists(): + shutil.rmtree(ACTUAL_DIR) + Path(ACTUAL_DIR).mkdir(parents=True, exist_ok=True) + +def validate_generated_workspace(): + """Validate that the generated workspace matches the expected artifacts.""" + print(f"{bcolors.OKBLUE}Validating the generated workspace.{bcolors.ENDC}") + + # Compare the expected and actual directories + if not compare_directories(EXPECTED_DIR, ACTUAL_DIR): + print(f"{bcolors.FAIL}❌ Test failed - The workspace generated by NotebookTools export federated function does not match the expected.{bcolors.ENDC}") + assert False, "The workspace generated by NotebookTools export federated functiona does not match the expected." + else: + print(f"{bcolors.OKGREEN}✔️ Test passed - The generated workspace matches the expected artifacts.{bcolors.ENDC}") + +def compare_files(file1, file2): + """Compare the content of two files, ignoring comment lines.""" + with open(file1, "r") as f1, open(file2, "r") as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + # Remove comment lines (lines starting with '#') + lines1 = [line for line in lines1 if not line.startswith("#")] + lines2 = [line for line in lines2 if not line.startswith("#")] + + if lines1 == lines2: + print(f"{bcolors.OKGREEN}✅ Successfully compared: {file1} and {file2}{bcolors.ENDC}") + return True + else: + print(f"{bcolors.FAIL}Comparison failed: {file1} and {file2}{bcolors.ENDC}") + print(f"{bcolors.FAIL}Differences:{bcolors.ENDC}") + for line1, line2 in zip(lines1, lines2): + if line1 != line2: + print(f"{bcolors.FAIL}Expected: {line1.strip()}{bcolors.ENDC}") + print(f"{bcolors.FAIL}Actual: {line2.strip()}{bcolors.ENDC}") + return False + +def compare_directories(dir1, dir2): + """Compare two directories recursively, including file content.""" + comparison = filecmp.dircmp(dir1, dir2) + + # Check for differences in file names or structure + if comparison.left_only or comparison.right_only: + print(f"{bcolors.FAIL}Differences found in directory structure: {comparison.left_only} only in {dir1}, {comparison.right_only} only in {dir2}{bcolors.ENDC}") + return False + + # Compare subdirectories, excluding __pycache__ + for subdir in comparison.common_dirs: + if subdir == "__pycache__": + continue + if not compare_directories(Path(dir1) / subdir, Path(dir2) / subdir): + return False + + # Compare file content for all common files + for file in comparison.common_files: + file1 = Path(dir1) / file + file2 = Path(dir2) / file + print(f"{bcolors.OKCYAN} Comparing files of expected and generated workspace.{bcolors.ENDC}") + if not compare_files(file1, file2): + return False + + return True + +def test_export_federated_functionality(setup_workspace): + """ + Test the workspace generated by NotebookTools export federated functionality matches the Expected Artifacts. + This function compares the contents of the actual directory generated by + NotebookTools with the expected directory. + """ + # Use the FederatedRuntime instance to get the parameters + director_fqdn = federated_runtime.director["director_node_fqdn"] + tls = federated_runtime.tls + + print(f"{bcolors.OKBLUE}Calling ... NotebookTools export_federated functionality to generate the actual workspace.{bcolors.ENDC}") + # Generate workspace using NotebookTools + NotebookTools.export_federated( + notebook_path=NOTEBOOK_PATH, + output_workspace=ACTUAL_DIR, + director_fqdn=director_fqdn, + tls=tls + ) + print(f"{bcolors.OKGREEN}NotebookTools execution complete.{bcolors.ENDC}") + + # Validate that the generated workspace matches the expected output + validate_generated_workspace()