针对已排序输入的高效赫夫曼编码


在之前的赫夫曼编码问题中,频率是未排序的。如果频率列表以排序顺序给出,则分配编码的任务将变得更加高效。

在此问题中,我们将使用两个空队列。然后为每个唯一字符创建叶节点,并按频率递增顺序将其插入队列。

在此方法中,该算法的复杂度为 O(n)。

输入和输出

Input:
Different letters and their frequency in sorted order
Letters: {L, K, X, C, E, B, A, F}
Frequency: {1, 1, 2, 2, 2, 2, 3, 4}
Output:
Codes for the letters
L: 0000
K: 0001
X: 001
C: 010
E: 011
F: 10
B: 110
A: 111

算法

huffmanCodes(dataList, freqList, n)

输入:数据列表、频率列表以及列表中的数据数目 n。

输出 −分配给代码的字符。

Begin
   root := huffmanTree(dataList, freqList, n) //create root of Huffman tree create an array to store codes, and top pointer for that array.
   call getCodes(root, array, top) to find codes for each character.
End

getCodes(root :node, array, top)

输入:根节点、用于存储代码的数组、数组的顶部。

输出 −每个字符的代码

Begin
   if leftChild(root) ≠φ then
      array[top] := 0
      getCodes(leftChild(root), array, top)
   if rightChild(root) ≠φ then
      array[top] = 1
      getCode(rightChild(root), array, top)
   if leftChild(root) = φ AND rightChild(root) = φ then
      display the character ch of root
      for all entries of the array do
         display the code in array[i] for character ch
      done
End

huffmanTree(dataList, freqList, n)

输入 −数据列表、频率列表以及列表中的数据数目 n。

输出 −创建一棵赫夫曼树

Begin
   for all different character ch do
      add node with ch and frequency of ch into queue q1
   done

   while q1 is not empty OR size of q2 ≠ 1 do
      find two minimum node using q1 and q2 and add them as left and
      right child of a new node.
      add new node in q2
   done

   delete node from q2 and return that node.
End

示例

#include<iostream>
#include<queue>
using namespace std;

struct node {
   char data;
   int freq;
   node *child0, *child1;
};

node *getNode(char d, int f) {
   node *newNode = new node;
   newNode->data = d;
   newNode->freq = f;
   newNode->child0 = NULL;
   newNode->child1 = NULL;
   return newNode;
}

node *findMinNode(queue<node*>&q1, queue<node*>&q2) {
   node *minNode;
   if(q1.empty()) { //if first queue is empty, delete and return node from second queue
      minNode = q2.front();
      q2.pop();
      return minNode;
   }

   if(q2.empty()) { //if second queue is empty, delete and return node from first queue
      minNode = q1.front();
      q1.pop();
      return minNode;
   }

   if((q1.front()->freq) < (q2.front()->freq)) { //find smaller from two queues
      minNode = q1.front();
      q1.pop();
      return minNode;
   }else {
      minNode = q2.front();
      q2.pop();
      return minNode;
   }
}

node *huffmanTree(char data[], int frequency[], int n) {
   node *c0, *c1, *par;
   node *newNode;
   queue<node*> qu1, qu2;

   for(int i = 0; i<n; i++) { //add all node to queue 1
      newNode = getNode(data[i], frequency[i]);
      qu1.push(newNode);
   }

   while(!(qu1.empty() && (qu2.size() == 1))) {
      c0 = findMinNode(qu1, qu2); //find two minimum as two child
      c1 = findMinNode(qu1, qu2);
      node *newNode = getNode('#', c0->freq+c1->freq);

      //intermediate node holds special character
      par = newNode;
      par->child0 = c0;
      par->child1 = c1;
      qu2.push(par); //add sub tree into queue 2
   }

   node *retNode = qu2.front();
   qu2.pop();
   return retNode;
}

void getCodes(node *rootNode, int array[], int n) {  //array to store the code
   if(rootNode->child0 != NULL) {
      array[n] = 0;
      getCodes(rootNode->child0, array, n+1);
   }

   if(rootNode->child1 != NULL) {
      array[n] = 1;
      getCodes(rootNode->child1, array, n+1);
   }

   if(rootNode->child0 == NULL && rootNode->child1 == NULL) {  // when root is leaf node
      cout << rootNode->data << ": ";

      for(int i = 0; i<n; i++)
         cout << array[i];
      cout << endl;
   }
}

void huffmanCodes(char data[], int frequency[], int n) {
   node *rootNode = huffmanTree(data, frequency, n);
   int array[50], top = 0;
   getCodes(rootNode, array, top);
}

int main() {
   char data[] = {'L', 'K', 'X', 'C', 'E', 'B', 'A', 'F'};
   int frequency[] = {1, 1, 2, 2, 2, 2, 3, 4};
   int n = sizeof(data)/sizeof(data[0]);
   huffmanCodes(data, frequency, n);
}

输出

L: 0000
K: 0001
X: 001
C: 010
E: 011
F: 10
B: 110
A: 111

更新于:15-6 月-2020

525 次浏览

开启你的职业生涯

完成课程获得认证

开始
广告