Skip to content

Commit

Permalink
Add new attribute matched_text_diagnostics
Browse files Browse the repository at this point in the history
This commit adds a new attribute for license text diagnostics
which will be added when the CLI option `--license-text-diagnostics`
is used, in contrast to earlier behaviour, where this diagnostics
matched text used to overwrite the text in `matched_text`.

Also makes sure top-level license/package summarizations have
matched text and diagnostics correcttly only when the respective
CLI options are used.

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Dec 13, 2023
1 parent d96e69e commit 2ddb31c
Show file tree
Hide file tree
Showing 107 changed files with 11,529 additions and 10,343 deletions.
29 changes: 21 additions & 8 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,12 @@ class LicenseMatchFromResult(LicenseMatch):
help='Text which was matched')
)

matched_text_diagnostics = attr.ib(
default=None,
metadata=dict(
help='Text which was matched, with extra diagnostics information.')
)

def score(self):
return self.match_score

Expand All @@ -631,6 +637,7 @@ def from_dict(cls, license_match_mapping):
"""
rule = Rule.from_match_data(license_match_mapping)
matched_text = license_match_mapping.get("matched_text") or None
matched_text_diagnostics = license_match_mapping.get("matched_text_diagnostics") or None

return cls(
from_file=license_match_mapping["from_file"],
Expand All @@ -641,6 +648,7 @@ def from_dict(cls, license_match_mapping):
match_coverage=license_match_mapping["match_coverage"],
matcher=license_match_mapping["matcher"],
text=matched_text,
matched_text_diagnostics=matched_text_diagnostics,
rule=rule,
qspan=None,
ispan=None,
Expand All @@ -664,10 +672,6 @@ def to_dict(
"""
Return a "result" scan data built from a LicenseMatch object.
"""
matched_text = None
if include_text:
matched_text = self.matched_text

result = {}

result['license_expression'] = self.rule.license_expression
Expand All @@ -689,8 +693,10 @@ def to_dict(
if rule_details:
result["rule_notes"] = self.rule.notes
result["referenced_filenames"] = self.rule.referenced_filenames
if include_text:
result['matched_text'] = matched_text
if include_text and self.matched_text:
result['matched_text'] = self.matched_text
if license_text_diagnostics and self.matched_text_diagnostics:
result['matched_text_diagnostics'] = self.matched_text_diagnostics
if rule_details:
result["rule_text"] = self.rule.text

Expand Down Expand Up @@ -929,7 +935,11 @@ def get_unique_detections(cls, license_detections):

return unique_license_detections

def to_dict(self, license_diagnostics):
def to_dict(self,
include_text=False,
license_text_diagnostics=False,
license_diagnostics=False,
):

def dict_fields(attr, value):

Expand All @@ -946,7 +956,10 @@ def dict_fields(attr, value):

detection_mapping = attr.asdict(self, filter=dict_fields)
detection_mapping["sample_matches"] = [
match.to_dict(include_text=True)
match.to_dict(
include_text=include_text,
license_text_diagnostics=license_text_diagnostics,
)
for match in self.matches
]
return detection_mapping
Expand Down
14 changes: 7 additions & 7 deletions src/licensedcode/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def to_dict(
spdx_license_url=SPDX_LICENSE_URL,
include_text=False,
license_text_diagnostics=False,
whole_lines=True,
whole_lines=False,
file_path=None,
):
"""
Expand All @@ -785,11 +785,11 @@ def to_dict(
if include_text:
if license_text_diagnostics:
matched_text_diagnostics = self.matched_text(whole_lines=False, highlight=True)

if whole_lines:
matched_text = self.matched_text(whole_lines=True, highlight=False)
else:
if whole_lines:
matched_text = self.matched_text(whole_lines=True, highlight=False)
else:
matched_text = self.matched_text(whole_lines=False, highlight=False)
matched_text = self.matched_text(whole_lines=False, highlight=False)

result = {}

Expand All @@ -808,8 +808,8 @@ def to_dict(

if include_text:
result['matched_text'] = matched_text
if license_text_diagnostics:
result['matched_text_diagnostics'] = matched_text_diagnostics
if license_text_diagnostics:
result['matched_text_diagnostics'] = matched_text_diagnostics
return result

def get_highlighted_text(self, trace=TRACE_HIGHLIGHTED_TEXT):
Expand Down
8 changes: 6 additions & 2 deletions src/licensedcode/plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def get_scanner(
unknown_licenses=unknown_licenses,
)

def process_codebase(self, codebase, license_diagnostics, **kwargs):
def process_codebase(self, codebase, license_text=False, license_diagnostics=False, license_text_diagnostics=False, **kwargs):
"""
Post-process ``codebase`` to follow referenced filenames to license
matches in other files.
Expand Down Expand Up @@ -231,7 +231,11 @@ def process_codebase(self, codebase, license_diagnostics, **kwargs):
)

unsorted_license_detections = [
unique_detection.to_dict(license_diagnostics=license_diagnostics)
unique_detection.to_dict(
include_text=license_text,
license_diagnostics=license_diagnostics,
license_text_diagnostics=license_text_diagnostics,
)
for unique_detection in unique_license_detections
]
codebase.attributes.license_detections.extend(
Expand Down
2 changes: 1 addition & 1 deletion src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def get_scanner(self, package=True, system_package=False, **kwargs):
system=system_package,
)

def process_codebase(self, codebase, strip_root=False, **kwargs):
def process_codebase(self, codebase, strip_root=False, license_text=False, license_diagnostics=False, license_text_diagnostics=False, **kwargs):
"""
Populate the ``codebase`` top level ``packages`` and ``dependencies``
with package and dependency instances, assembling parsed package data
Expand Down
28 changes: 23 additions & 5 deletions src/summarycode/todo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,22 @@ def process_codebase(self, codebase, **kwargs):
if hasattr(codebase.root, 'license_detections'):
has_licenses = True

license_diagnostics = kwargs.get("license_diagnostics")
license_text = kwargs.get("license_text")
license_text_diagnostics = kwargs.get("license_text_diagnostics")
if not license_diagnostics or not license_text or not license_text_diagnostics:
usage_suggestion_message = (
"The --review option, whe paired with --license option should be used with the folowing "
"additional CLI options for maximum benifit: [`--license-text`, `--license-text-diagnostics`,"
"--license-diagnostics`] as these show additional diagnostic information to help review the issues."
)
warnings.simplefilter('always', ToDoPluginUsageWarning)
warnings.warn(
usage_suggestion_message,
ToDoPluginUsageWarning,
stacklevel=2,
)

if not has_packages and not has_licenses:
usage_suggestion_message = (
"The --review option should be used with atleast one of the license [`--license`], "
Expand Down Expand Up @@ -323,11 +339,13 @@ def dict_fields(attr, value):
matches_with_details = []
for license_match in detection_mapping["detection"]["matches"]:
license_match_obj = LicenseMatchFromResult.from_dict(license_match)
matches_with_details.append(license_match_obj.to_dict(
include_text=True,
license_text_diagnostics=True,
rule_details=True,
))
matches_with_details.append(
license_match_obj.to_dict(
include_text=True,
license_text_diagnostics=True,
rule_details=True,
)
)
detection_mapping["detection"]["matches"] = matches_with_details

return detection_mapping
Expand Down
25 changes: 12 additions & 13 deletions tests/cluecode/data/plugin_filter_clues/filtered-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
"detection_count": 1,
"sample_matches": [
{
"score": 96.07,
"license_expression": "apache-1.1",
"license_expression_spdx": "Apache-1.1",
"from_file": "LICENSE",
"start_line": 7,
"end_line": 70,
"matcher": "3-seq",
"score": 96.07,
"matched_length": 367,
"match_coverage": 100.0,
"matcher": "3-seq",
"license_expression": "apache-1.1",
"license_expression_spdx": "Apache-1.1",
"rule_identifier": "apache-1.1_63.RULE",
"rule_relevance": 100,
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE",
"matched_text": null
"rule_identifier": "apache-1.1_63.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE"
}
]
}
Expand Down Expand Up @@ -52,17 +51,17 @@
"license_expression_spdx": "Apache-1.1",
"matches": [
{
"score": 96.07,
"license_expression": "apache-1.1",
"spdx_license_expression": "Apache-1.1",
"from_file": "LICENSE",
"start_line": 7,
"end_line": 70,
"from_file": "LICENSE",
"matcher": "3-seq",
"score": 96.07,
"matched_length": 367,
"match_coverage": 100.0,
"matcher": "3-seq",
"license_expression": "apache-1.1",
"spdx_license_expression": "Apache-1.1",
"rule_identifier": "apache-1.1_63.RULE",
"rule_relevance": 100,
"rule_identifier": "apache-1.1_63.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE"
}
],
Expand Down
25 changes: 12 additions & 13 deletions tests/cluecode/data/plugin_filter_clues/filtered-expected2.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
"detection_count": 1,
"sample_matches": [
{
"score": 100.0,
"license_expression": "pygres-2.2",
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
"from_file": "LICENSE2",
"start_line": 7,
"end_line": 22,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 145,
"match_coverage": 100.0,
"matcher": "2-aho",
"license_expression": "pygres-2.2",
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
"rule_identifier": "pygres-2.2_2.RULE",
"rule_relevance": 100,
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE",
"matched_text": null
"rule_identifier": "pygres-2.2_2.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE"
}
]
}
Expand Down Expand Up @@ -52,17 +51,17 @@
"license_expression_spdx": "LicenseRef-scancode-pygres-2.2",
"matches": [
{
"score": 100.0,
"license_expression": "pygres-2.2",
"spdx_license_expression": "LicenseRef-scancode-pygres-2.2",
"from_file": "LICENSE2",
"start_line": 7,
"end_line": 22,
"from_file": "LICENSE2",
"matcher": "2-aho",
"score": 100.0,
"matched_length": 145,
"match_coverage": 100.0,
"matcher": "2-aho",
"license_expression": "pygres-2.2",
"spdx_license_expression": "LicenseRef-scancode-pygres-2.2",
"rule_identifier": "pygres-2.2_2.RULE",
"rule_relevance": 100,
"rule_identifier": "pygres-2.2_2.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE"
}
],
Expand Down
25 changes: 12 additions & 13 deletions tests/cluecode/data/plugin_filter_clues/filtered-expected3.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
"detection_count": 1,
"sample_matches": [
{
"score": 100.0,
"license_expression": "pcre",
"license_expression_spdx": "LicenseRef-scancode-pcre",
"from_file": "LICENSE3",
"start_line": 1,
"end_line": 47,
"matcher": "1-hash",
"score": 100.0,
"matched_length": 303,
"match_coverage": 100.0,
"matcher": "1-hash",
"license_expression": "pcre",
"license_expression_spdx": "LicenseRef-scancode-pcre",
"rule_identifier": "pcre.LICENSE",
"rule_relevance": 100,
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE",
"matched_text": null
"rule_identifier": "pcre.LICENSE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE"
}
]
}
Expand Down Expand Up @@ -52,17 +51,17 @@
"license_expression_spdx": "LicenseRef-scancode-pcre",
"matches": [
{
"score": 100.0,
"license_expression": "pcre",
"spdx_license_expression": "LicenseRef-scancode-pcre",
"from_file": "LICENSE3",
"start_line": 1,
"end_line": 47,
"from_file": "LICENSE3",
"matcher": "1-hash",
"score": 100.0,
"matched_length": 303,
"match_coverage": 100.0,
"matcher": "1-hash",
"license_expression": "pcre",
"spdx_license_expression": "LicenseRef-scancode-pcre",
"rule_identifier": "pcre.LICENSE",
"rule_relevance": 100,
"rule_identifier": "pcre.LICENSE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE"
}
],
Expand Down
Loading

0 comments on commit 2ddb31c

Please sign in to comment.