Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cli tool to look at differences between two output definition files #424

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 45 additions & 4 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from pathlib import Path
from typing import List, Optional
import importlib.util
import sys
from pathlib import Path
from typing import List, Optional

import click

import pandas as pd
from pyam import IamDataFrame
from nomenclature.definition import DataStructureDefinition

from nomenclature.codelist import VariableCodeList
from nomenclature.definition import DataStructureDefinition
from nomenclature.processor import RegionProcessor
from nomenclature.testing import assert_valid_structure, assert_valid_yaml

Expand Down Expand Up @@ -189,6 +190,46 @@ def cli_export_definitions_to_excel(
DataStructureDefinition(path / "definitions").to_excel(target)


@cli.command("diff-definitions")
@click.argument("source", type=click.Path(exists=True, path_type=Path))
@click.argument("target", type=click.Path(exists=True, path_type=Path))
@click.option("--sheet_name", default="variable")
@click.option("--output", type=click.Path(path_type=Path), default="diff.xlsx")
def cli_diff_definitions_to_excel(
source: Path,
target: Path,
sheet_name: Optional[str],
output: Optional[Path],
):
"""Report the difference between two excel sheets generated by `export-definitions`.

Values in `source` but not in `target` are placed in the column named "source".
Values in `target` but not `source` are placed in a column named "target".

Parameters
----------
source : Path
Path and file name for the source file
target : Path
Path and file name for the target file
sheet_name : Optional[str]
The sheet_name to use for comparison, by default "variable"
output : Optional[Path]
Exports the results from the diff to a file called
`output`, by default "diff.xlsx"
"""
s_col = set(pd.read_excel(source, sheet_name=sheet_name)[sheet_name])
t_col = set(pd.read_excel(target, sheet_name=sheet_name)[sheet_name])
diff = pd.concat(
[
pd.Series(list(s_col.difference(t_col)), name=source),
pd.Series(list(t_col.difference(s_col)), name=target),
],
axis="columns",
)
diff.to_excel(output, sheet_name=sheet_name, index=False)


@cli.command("list-missing-variables")
@click.argument("data", type=click.Path(exists=True, path_type=Path))
@click.option(
Expand Down