Add key detection evaluation

This adds the single key detection eval function used by MIREX (a heuristic score which weights certain key results as better/worse than others) and an io function for loading in data in the MIREX format (a single tab-separated line denoting the key scale degree and mode). Its output was verified against a Python port of the IMIRSEL code.
mir-evaluation · Feb 27, 2016 · 4b60148 · 4b60148
1 parent ae920a1
commit 4b60148
Show file tree

Hide file tree

Showing 3 changed files with 217 additions and 0 deletions.
diff --git a/mir_eval/__init__.py b/mir_eval/__init__.py
@@ -15,5 +15,6 @@
 from . import tempo
 from . import hierarchy
 from . import transcription
+from . import key
 
 __version__ = '0.2'
diff --git a/mir_eval/io.py b/mir_eval/io.py
@@ -9,6 +9,7 @@
 import six
 
 from . import util
+from . import key
 
 
 def load_delimited(filename, converters, delimiter=r'\s+'):
@@ -440,3 +441,39 @@ def load_valued_intervals(filename, delimiter=r'\s+'):
     values = np.array(values)
 
     return intervals, values
+
+
+def load_key(filename, delimiter=r'\s+'):
+    r"""Load key labels from an annotation file. The file should
+    consist of two string columns: One denoting the key scale degree
+    (semitone), and the other denoting the mode (major or minor).  The file
+    should contain only one row.
+
+    Parameters
+    ----------
+    filename : str
+        Path to the annotation file
+    delimiter : str
+        Separator regular expression.
+        By default, lines will be split by any amount of whitespace.
+
+    Returns
+    -------
+    key : str
+        Key label, in the form ``'(key) (mode)'``
+
+    """
+    # Use our universal function to load the key and mode strings
+    scale, mode = load_delimited(filename, [str, str], delimiter)
+    if len(scale) != 1:
+        raise ValueError('Key file should contain only one line.')
+    scale, mode = scale[0], mode[0]
+    # Join with a space
+    key_string = '{} {}'.format(scale, mode)
+    # Validate them, but throw a warning in place of an error
+    try:
+        key.validate_key(key_string)
+    except ValueError as error:
+        warnings.warn(error.args[0])
+
+    return key_string
diff --git a/mir_eval/key.py b/mir_eval/key.py
@@ -0,0 +1,179 @@
+'''
+Key Detection involves determining the underlying key (distribution of notes
+and note transitions) in a piece of music.  Key detection algorithms are
+evaluated by comparing their estimated key to a ground-truth reference key and
+reporting a score according to the relationship of the keys.
+
+Conventions
+-----------
+Keys are represented as strings of the form ``'(key) (mode)'``, e.g. ``'C#
+major'`` or ``'Fb minor'``.  The case of the key is ignored.  Note that certain
+key strings are equivalent, e.g. ``'C# major'`` and ``'Db major'``.  The mode
+may only be specified as either ``'major'`` or ``'minor'``, no other mode
+strings will be accepted.
+
+Metrics
+-------
+* :func:`mir_eval.key.weighted_score`: Heuristic scoring of the relation of two
+  keys.
+'''
+
+import collections
+from . import util
+
+
+KEY_TO_SEMITONE = {'c': 0, 'c#': 1, 'db': 1, 'd': 2, 'd#': 3, 'eb': 3, 'e': 4,
+                   'f': 5, 'f#': 6, 'gb': 6, 'g': 7, 'g#': 8, 'ab': 8, 'a': 9,
+                   'a#': 10, 'bb': 10, 'b': 11}
+
+
+def validate_key(key):
+    """Checks that a key is well-formatted, e.g. in the form ``'C# major'``.
+
+    Parameters
+    ----------
+    key : str
+        Key to verify
+    """
+    if len(key.split()) != 2:
+        raise ValueError("'{}' is not in the form '(key) (mode)'".format(key))
+    key, mode = key.split()
+    if key.lower() not in KEY_TO_SEMITONE:
+        raise ValueError(
+            "Key {} is invalid; should be e.g. D or C# or Eb".format(key))
+    if mode not in ['major', 'minor']:
+        raise ValueError(
+            "Mode '{}' is invalid; must be 'major' or 'minor'".format(mode))
+
+
+def validate(reference_key, estimated_key):
+    """Checks that the input annotations to a metric are valid key strings and
+    throws helpful errors if not.
+
+    Parameters
+    ----------
+    reference_key : str
+        Reference key string.
+    estimated_key : str
+        Estimated key string.
+    """
+    for key in [reference_key, estimated_key]:
+        validate_key(key)
+
+
+def split_key_string(key):
+    """Splits a key string (of the form, e.g. ``'C# major'``), into a tuple of
+    ``(key, mode)`` where ``key`` is is an integer representing the semitone
+    distance from C.
+
+    Parameters
+    ----------
+    key : str
+        String representing a key.
+
+    Returns
+    -------
+    key : int
+        Number of semitones above C.
+    mode : str
+        String representing the mode.
+    """
+    key, mode = key.split()
+    return KEY_TO_SEMITONE[key.lower()], mode
+
+
+def weighted_score(reference_key, estimated_key):
+    """Computes a heuristic score which is weighted according to the
+    relationship of the reference and estimated key, as follows:
+
+    +------------------------------------------------------+-------+
+    | Relationship                                         | Score |
+    +------------------------------------------------------+-------+
+    | Same key                                             | 1.0   |
+    +------------------------------------------------------+-------+
+    | Estimated key is a perfect fifth above reference key | 0.5   |
+    +------------------------------------------------------+-------+
+    | Relative major/minor                                 | 0.3   |
+    +------------------------------------------------------+-------+
+    | Parallel major/minor                                 | 0.2   |
+    +------------------------------------------------------+-------+
+    | Other                                                | 0.0   |
+    +------------------------------------------------------+-------+
+
+    Examples
+    --------
+    >>> ref_key = mir_eval.io.load_key('ref.txt')
+    >>> est_key = mir_eval.io.load_key('ref.txt')
+    >>> score = mir_eval.key.weighted_score(ref_key, est_key)
+
+    Parameters
+    ----------
+    reference_key : str
+        Reference key string.
+    estimated_key : str
+        Estimated key string.
+
+    Returns
+    -------
+    score : float
+        Score representing how closely related the keys are.
+    """
+    validate(reference_key, estimated_key)
+    reference_key, reference_mode = split_key_string(reference_key)
+    estimated_key, estimated_mode = split_key_string(estimated_key)
+    # If keys are the same, return 1.
+    if reference_key == estimated_key and reference_mode == estimated_mode:
+        return 1.
+    # If keys are the same mode and a perfect fifth (differ by 7 semitones)
+    if (estimated_mode == reference_mode and
+            (estimated_key - reference_key) % 12 == 7):
+        return 0.5
+    # Estimated key is relative minor of reference key (9 semitones)
+    if (estimated_mode != reference_mode == 'major' and
+            (estimated_key - reference_key) % 12 == 9):
+        return 0.3
+    # Estimated key is relative major of reference key (3 semitones)
+    if (estimated_mode != reference_mode == 'minor' and
+            (estimated_key - reference_key) % 12 == 3):
+        return 0.3
+    # If keys are in different modes and parallel (same key name)
+    if estimated_mode != reference_mode and reference_key == estimated_key:
+        return 0.2
+    # Otherwise return 0
+    return 0.
+
+
+def evaluate(reference_key, estimated_key, **kwargs):
+    """Compute all metrics for the given reference and estimated annotations.
+
+    Examples
+    --------
+    >>> ref_key = mir_eval.io.load_key('reference.txt')
+    >>> est_key = mir_eval.io.load_key('estimated.txt')
+    >>> scores = mir_eval.key.evaluate(ref_key, est_key)
+
+    Parameters
+    ----------
+    ref_key : str
+        Reference key string.
+
+    ref_key : str
+        Estimated key string.
+
+    kwargs
+        Additional keyword arguments which will be passed to the
+        appropriate metric or preprocessing functions.
+
+    Returns
+    -------
+    scores : dict
+        Dictionary of scores, where the key is the metric name (str) and
+        the value is the (float) score achieved.
+    """
+    # Compute all metrics
+    scores = collections.OrderedDict()
+
+    scores['Weighted Score'] = util.filter_kwargs(
+            weighted_score, reference_key, estimated_key)
+
+    return scores