Source code for tryalgo.huffman
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""\
Huffman code
jill-jenn vie et christoph durr - 2014-2022
"""
from heapq import heappush, heappop
# snip{
[docs]def huffman(freq):
"""Huffman code
:param freq: dictionary with frequencies for each item
:returns: dictionary with binary code string for each item
:complexity: O(n log n), where n = len(freq)
"""
forest = [] # is a heap with (frequence, tree_index) pairs
trees = [] # list of trees, tree is a letter or a list of two trees
for item in freq:
heappush(forest, (freq[item], len(trees)))
trees.append(item) # this item will be at index len(trees)
while len(forest) > 1:
(f_l, left) = heappop(forest) # merge two trees
(f_r, right) = heappop(forest)
heappush(forest, (f_l + f_r, len(trees)))
trees.append([trees[left], trees[right]])
code = {} # build code from unique tree
extract(code, trees[forest[0][1]], [])
return code
[docs]def extract(code, tree, prefix):
"""Extract Huffman code from a Huffman tree
:param code: a dictionary that will contain the constructed code.
should initially be empty.
:param tree: a node of the tree.
Leafs are of the form (frequency, symbol).
Inner nodes are of the form [left_sub_tree, right_sub_tree].
:param prefix: a list with the 01 characters encoding the path from
the root to the node `tree`
:complexity: O(n), where n = number of nodes in tree
"""
if isinstance(tree, list): # inner node
left, right = tree
prefix.append('0')
extract(code, left, prefix) # build code recursively
prefix.pop()
prefix.append('1')
extract(code, right, prefix) # ... on both subtrees
prefix.pop()
else:
code[tree] = ''.join(prefix) # extract codeword from prefix
# snip}