-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Stéphane Champailler
committed
Apr 26, 2021
1 parent
9c9db62
commit d2276fb
Showing
5 changed files
with
320 additions
and
224 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import heapq | ||
from io import StringIO | ||
|
||
|
||
class Node: | ||
def __init__(self, left_child=None, right_child=None, weight=None, symbol=None): | ||
self.left_child = left_child | ||
self.right_child = right_child | ||
|
||
if self.has_both_children(): | ||
assert weight is None and symbol is None | ||
self.weight = self.left_child.weight + self.right_child.weight | ||
self.symbol = None | ||
else: | ||
assert weight > 0 and symbol is not None, f"Weight={weight}, symbol={symbol}" | ||
self.weight = weight | ||
self.symbol = symbol | ||
|
||
assert (left_child is None and right_child is None) or self.has_both_children() | ||
self.code = None | ||
|
||
def has_both_children(self): | ||
return self.left_child is not None and self.right_child is not None | ||
|
||
def __eq__(self, other): | ||
return self.weight == other.weight | ||
|
||
def __lt__(self, other): | ||
return self.weight < other.weight | ||
|
||
|
||
def build_huffman_tree(symbols_cnts: dict): | ||
# Create leaves of the tree | ||
nodes = [] | ||
for symbol, cnt in symbols_cnts.items(): | ||
nodes.append((cnt, Node(None, None, cnt, symbol))) | ||
|
||
# Order leaves by weights, heapq is a min-heap | ||
heapq.heapify(nodes) | ||
|
||
# Build the tree bottom up | ||
while len(nodes) > 1: | ||
# Pop the two nodes with the lowest weights | ||
left = heapq.heappop(nodes)[1] | ||
right = heapq.heappop(nodes)[1] | ||
|
||
new_node = Node(left, right) | ||
heapq.heappush(nodes, (new_node.weight, new_node)) | ||
|
||
# return the remaining node which is the top node | ||
# of the tree | ||
return nodes[0][1] | ||
|
||
|
||
def compute_leaves_codes(node: Node, prefix=""): | ||
if node.has_both_children(): | ||
a = compute_leaves_codes(node.left_child, prefix + "0") | ||
b = compute_leaves_codes(node.right_child, prefix + "1") | ||
return a+b | ||
else: | ||
assert node.left_child is None and node.right_child is None | ||
node.code = prefix | ||
return [node] | ||
|
||
|
||
def build_codebooks(top_node): | ||
# Affect a code to each leaf node | ||
d = compute_leaves_codes(top_node, "") | ||
|
||
# Build maps from/to symbol to/from Huffman codes | ||
code_map = dict() | ||
decode_map = dict() | ||
for node in sorted(d, key=lambda n: n.weight): | ||
#print(f"{node.symbol} {node.weight:5d} {node.code}") | ||
code_map[node.symbol] = node.code | ||
decode_map[node.code] = node.symbol | ||
|
||
return code_map, decode_map |
Oops, something went wrong.