forked from matplotlib/pytest-mpl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
517 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
""" | ||
This module contains the supported hashing kernel implementations. | ||
""" | ||
import hashlib | ||
from abc import ABC, abstractmethod | ||
|
||
import imagehash | ||
from PIL import Image | ||
|
||
#: The default hamming distance bit tolerance for "similar" imagehash hashes. | ||
DEFAULT_HAMMING_TOLERANCE = 4 | ||
|
||
#: The default imagehash hash size (N), resulting in a hash of N**2 bits. | ||
DEFAULT_HASH_SIZE = 16 | ||
|
||
#: Level of image detail (high) or structure (low) represented by phash . | ||
DEFAULT_HIGH_FREQUENCY_FACTOR = 4 | ||
|
||
#: Registered kernel names. | ||
KERNEL_PHASH = "phash" | ||
KERNEL_SHA256 = "sha256" | ||
|
||
__all__ = [ | ||
"DEFAULT_HAMMING_TOLERANCE", | ||
"DEFAULT_HASH_SIZE", | ||
"DEFAULT_HIGH_FREQUENCY_FACTOR", | ||
"KERNEL_PHASH", | ||
"KERNEL_SHA256", | ||
"KernelPHash", | ||
"KernelSHA256", | ||
"kernel_factory", | ||
] | ||
|
||
|
||
class Kernel(ABC): | ||
""" | ||
Kernel abstract base class (ABC) which defines a simple common kernel API. | ||
""" | ||
|
||
def __init__(self, plugin): | ||
# Containment of the plugin allows the kernel to cherry-pick required state. | ||
self._plugin = plugin | ||
|
||
@abstractmethod | ||
def equivalent_hash(self, result, baseline, marker=None): | ||
""" | ||
Determine whether the kernel considers the provided result (actual) | ||
and baseline (expected) hashes as similar. | ||
Parameters | ||
---------- | ||
result : str | ||
The hash of the image generated by the test. | ||
baseline : str | ||
The hash of the baseline image. | ||
marker : pytest.Mark | ||
The test marker, which may contain kwarg options to be | ||
applied to the equivalence test. | ||
Returns | ||
------- | ||
bool | ||
Whether the result and baseline hashes are deemed similar. | ||
""" | ||
|
||
@abstractmethod | ||
def generate_hash(self, buffer): | ||
""" | ||
Computes the hash of the image from the in-memory/open byte stream | ||
buffer. | ||
Parameters | ||
---------- | ||
buffer : stream | ||
The in-memory/open byte stream of the image. | ||
Returns | ||
------- | ||
str | ||
The string representation (hexdigest) of the image hash. | ||
""" | ||
|
||
def update_status(self, message): | ||
""" | ||
Append the kernel status message to the provided message. | ||
Parameters | ||
---------- | ||
message : str | ||
The existing status message. | ||
Returns | ||
------- | ||
str | ||
The updated status message. | ||
""" | ||
return message | ||
|
||
def update_summary(self, summary): | ||
""" | ||
Refresh the image comparison summary with relevant kernel entries. | ||
Parameters | ||
---------- | ||
summary : dict | ||
Image comparison test report summary. | ||
Returns | ||
------- | ||
None | ||
""" | ||
# The "name" class property *must* be defined in derived child class. | ||
summary["kernel"] = self.name | ||
|
||
@property | ||
def metadata(self): | ||
""" | ||
The kernel metadata to be archived in a hash library with results. | ||
Returns | ||
------- | ||
dict | ||
The kernel metadata. | ||
""" | ||
return dict(name=self.name) | ||
|
||
|
||
class KernelPHash(Kernel): | ||
""" | ||
Kernel that calculates a perceptual hash of an image for the | ||
specified hash size (N) and high frequency factor. | ||
Where the resultant perceptual hash will be composed of N**2 bits. | ||
""" | ||
|
||
name = KERNEL_PHASH | ||
|
||
def __init__(self, plugin): | ||
super().__init__(plugin) | ||
# Keep state of the equivalence result. | ||
self.equivalent = None | ||
# Keep state of hash hamming distance (whole number) result. | ||
self.hamming_distance = None | ||
# Value may be overridden by py.test marker kwarg. | ||
arg = self._plugin.hamming_tolerance | ||
self.hamming_tolerance = ( | ||
int(arg) if arg is not None else DEFAULT_HAMMING_TOLERANCE | ||
) | ||
# The hash-size (N) defines the resultant N**2 bits hash size. | ||
arg = self._plugin.hash_size | ||
self.hash_size = int(arg) if arg is not None else DEFAULT_HASH_SIZE | ||
# The level of image detail (high freq) or structure (low freq) | ||
# represented in perceptual hash thru discrete cosine transform. | ||
arg = self._plugin.high_freq_factor | ||
self.high_freq_factor = ( | ||
int(arg) if arg is not None else DEFAULT_HIGH_FREQUENCY_FACTOR | ||
) | ||
# py.test marker kwarg. | ||
self.option = "hamming_tolerance" | ||
|
||
def equivalent_hash(self, result, baseline, marker=None): | ||
if marker: | ||
value = marker.kwargs.get(self.option) | ||
if value is not None: | ||
# Override with the decorator marker value. | ||
self.hamming_tolerance = int(value) | ||
# Convert string hexdigest hashes to imagehash.ImageHash instances. | ||
result = imagehash.hex_to_hash(result) | ||
baseline = imagehash.hex_to_hash(baseline) | ||
# Unlike cryptographic hashes, perceptual hashes can measure the | ||
# degree of "similarity" through hamming distance bit differences | ||
# between the hashes. | ||
try: | ||
self.hamming_distance = result - baseline | ||
self.equivalent = self.hamming_distance <= self.hamming_tolerance | ||
except TypeError: | ||
# imagehash won't compare hashes of different sizes, however | ||
# let's gracefully support this for use-ability. | ||
self.hamming_distance = None | ||
self.equivalent = False | ||
return self.equivalent | ||
|
||
def generate_hash(self, buffer): | ||
buffer.seek(0) | ||
data = Image.open(buffer) | ||
phash = imagehash.phash( | ||
data, hash_size=self.hash_size, highfreq_factor=self.high_freq_factor | ||
) | ||
return str(phash) | ||
|
||
def update_status(self, message): | ||
result = str() if message is None else str(message) | ||
# Only update the status message for non-equivalent hash comparisons. | ||
if self.equivalent is False: | ||
msg = ( | ||
f"Hash hamming distance of {self.hamming_distance} bits > " | ||
f"hamming tolerance of {self.hamming_tolerance} bits." | ||
) | ||
result = f"{message} {msg}" if len(result) else msg | ||
return result | ||
|
||
def update_summary(self, summary): | ||
super().update_summary(summary) | ||
summary["hamming_distance"] = self.hamming_distance | ||
summary["hamming_tolerance"] = self.hamming_tolerance | ||
|
||
@property | ||
def metadata(self): | ||
result = super().metadata | ||
result["hash_size"] = self.hash_size | ||
result["high_freq_factor"] = self.high_freq_factor | ||
return result | ||
|
||
|
||
class KernelSHA256(Kernel): | ||
""" | ||
A simple kernel that calculates a 256-bit cryptographic SHA hash | ||
of an image. | ||
""" | ||
|
||
name = KERNEL_SHA256 | ||
|
||
def equivalent_hash(self, result, baseline, marker=None): | ||
# Simple cryptographic hash binary comparison. Interpretation of | ||
# the comparison result is that the hashes are either identical or | ||
# not identical. For non-identical hashes, it is not possible to | ||
# determine a heuristic of hash "similarity" due to the nature of | ||
# cryptographic hashes. | ||
return result == baseline | ||
|
||
def generate_hash(self, buffer): | ||
buffer.seek(0) | ||
data = buffer.read() | ||
hasher = hashlib.sha256() | ||
hasher.update(data) | ||
return hasher.hexdigest() | ||
|
||
|
||
#: Registry of available hashing kernel factories. | ||
kernel_factory = { | ||
KernelPHash.name: KernelPHash, | ||
KernelSHA256.name: KernelSHA256, | ||
} |
Oops, something went wrong.