Huffman code
Huffman algorithm is a lossless data compression algorithm. Huffman codes are used for compressing data efficiently from 20% to 90%.We consider the data to be a sequence of characters.
This algorithm uses a table of the frequencies of occurrence of the characters to build up an optimal way of representing each character as a binary string.
Huffman Algorithm
HUFFMAN(C) 1 n ← |C| 2 Q ← C 3 for i 1 to n - 1 4 do allocate a new node z 5 left[z] ← x ← EXTRACT-MIN (Q) 6 right[z] ← y ← EXTRACT-MIN (Q) 7 f [z] ← f [x] + f [y] 8 INSERT(Q, z) 9 return EXTRACT-MIN(Q)
Constructing a Huffman code
Example
Character | Frequency | Fixed-length codeword |
---|---|---|
a | 45 | 000 |
b | 13 | 001 |
c | 12 | 010 |
d | 16 | 011 |
e | 9 | 100 |
f | 5 | 101 |
There are each character is represented by a unique binary string. If we use a fixed-length code, we need 3 bits to represent the characters.
Total Size = (45+13+12+16+9+5)*10^3
Total Size = 10^5 bits.
Requires bits = 10^5*3 = 3,00,000.
Procedure for Construction of Huffman tree
Step 1.Arrenge the given character in decending order of their frequency.
![](hstep1.png)
Step 2.
![](hstep2.png)
Step 3.
![](hstep3.png)
Step 4.
![](hstep4.png)
Step 5.
![](hstep5.png)
Step 6.
![](hstep6.png)
Step 7.
![](hstep7.png)
Steps to print codes from Huffman Tree
Traverse huffman tree from the root node. Create an array. While moving to the left child, write 0 to the array. While moving to the right child, write 1 to the array. Print the array when a leaf node is encountered.![](hstep8.png)
Character | Variable-length codeword | Size of string in bit |
---|---|---|
a | 0 | 1 |
b | 101 | 3 |
c | 100 | 3 |
d | 111 | 3 |
e | 1101 | 4 |
f | 1100 | 4 |
Calculation for Compression ratio
Size = ((45*1) + (13*3) + (12*3) + (16*3) + (9*4) + (5*4))*10^3)Requires bits = 224 * 10^3
Requires bits = 224000.
Compression ratio
η = ((3,00,000 - 2,24,000) / 3,00,000)*100
η = 25.33%.
Programe For Huffman Code
#include <stdio.h> #include <stdlib.h> #include <conio.h> #define MAX_TREE_HT 100 struct MH_Node { char character; unsigned frequency; struct MH_Node *l, *r; }; struct M_Heap { unsigned size; unsigned space; struct MH_Node **array; }; struct MH_Node* newNode(char character, unsigned frequency) { struct MH_Node* temp = (struct MH_Node*) malloc(sizeof(struct MH_Node)); temp->l = temp->r = NULL; temp->character = character; temp->frequency = frequency; return temp; } struct M_Heap* createM_Heap(unsigned space) { struct M_Heap* M_Heap = (struct M_Heap*) malloc(sizeof(struct M_Heap)); M_Heap->size = 0; M_Heap->space = space; M_Heap->array = (struct MH_Node**)malloc(M_Heap->space * sizeof(struct MH_Node*)); return M_Heap; } void swapMH_Node(struct MH_Node** a, struct MH_Node** b) { struct MH_Node* t = *a; *a = *b; *b = t; } void M_Heapify(struct M_Heap* M_Heap, int idx) { int smallest = idx; int l = 2 * idx + 1; int r = 2 * idx + 2; if (l < M_Heap->size && M_Heap->array[l]->frequency < M_Heap->array[smallest]->frequency) smallest = l; if (r < M_Heap->size && M_Heap->array[r]->frequency < M_Heap->array[smallest]->frequency) smallest = r; if (smallest != idx) { swapMH_Node(&M_Heap->array[smallest], &M_Heap->array[idx]); M_Heapify(M_Heap, smallest); } } int isSizeOne(struct M_Heap* M_Heap) { return (M_Heap->size == 1); } struct MH_Node* extractMin(struct M_Heap* M_Heap) { struct MH_Node* temp = M_Heap->array[0]; M_Heap->array[0] = M_Heap->array[M_Heap->size - 1]; --M_Heap->size; M_Heapify(M_Heap, 0); return temp; } void insertM_Heap(struct M_Heap* M_Heap, struct MH_Node* MH_Node) { int i = M_Heap->size - 1; ++M_Heap->size; while (i && MH_Node->frequency < M_Heap->array[(i - 1)/2]->frequency) { M_Heap->array[i] = M_Heap->array[(i - 1)/2]; i = (i - 1)/2; } M_Heap->array[i] = MH_Node; } void buildM_Heap(struct M_Heap* M_Heap) { int n = M_Heap->size - 1; int i; for (i = (n - 1) / 2; i >= 0; --i) M_Heapify(M_Heap, i); } void printArr(int arr[], int n) { int i; for (i = 0; i < n; ++i) printf("%d", arr[i]); printf("\n"); } int isLeaf(struct MH_Node* root) { return !(root->l) && !(root->r) ; } struct M_Heap* createAndBuildM_Heap(char character[], int frequency[], int size) { int i; struct M_Heap* M_Heap = createM_Heap(size); for (i = 0; i < size; ++i) M_Heap->array[i] = newNode(character[i], frequency[i]); M_Heap->size = size; buildM_Heap(M_Heap); return M_Heap; } struct MH_Node* buildHuffmanTree(char character[], int frequency[], int size) { struct MH_Node *l, *r, *top; struct M_Heap* M_Heap = createAndBuildM_Heap(character, frequency, size); while (!isSizeOne(M_Heap)) { l = extractMin(M_Heap); r = extractMin(M_Heap); top = newNode('$', l->frequency + r->frequency); top->l = l; top->r = r; insertM_Heap(M_Heap, top); } return extractMin(M_Heap); } void printCodes(struct MH_Node* root, int arr[], int top) { if (root->l) { arr[top] = 0; printCodes(root->l, arr, top + 1); } if (root->r) { arr[top] = 1; printCodes(root->r, arr, top + 1); } if (isLeaf(root)) { printf("%c: ", root->character); printArr(arr, top); } } void HuffmanCodes(char character[], int frequency[], int size) { struct MH_Node* root = buildHuffmanTree(character, frequency, size); int arr[MAX_TREE_HT], top = 0; printCodes(root, arr, top); } void main() { char arr[] = {'a', 'b', 'c', 'd', 'e', 'f'}; int frequency[] = {5, 9, 12, 13, 16, 45}; int size; clrscr(); size = sizeof(arr)/sizeof(arr[0]); HuffmanCodes(arr, frequency, size); getch(); }