带权路径最小的二叉树称为最优二叉树或Huffman(哈夫曼树)。
Huffman树的构造
将节点的权值存入数组中,由数组开始构造Huffman树。初始化指针数组,指针指向含有权值的孤立节点。
b = malloc(n*sizeof(BTreeNode));
for (i = 0; i < n; i++) {b[i] = malloc(sizeof(BTreeNode));b[i]->data = a[i];b[i]->left = NULL;b[i]->right = NULL;
}
数组b中的指针可以理解为二叉树的根指针。
进行n - 1次循环建立Huffman树
选择b中根节点权值最小的两棵二叉树作为左右子树组成新的二叉树,新二叉树的根节点权值为两颗二叉树根节点权值的和。
将新二叉树添加到b中,并从b中删除原来的两棵二叉树。当b中只有一棵树时终止循环。
int k1 = -1, k2;
for (j = 0; j < n; j++)
//让k1初始指向森林中第一棵树,k2指向第二棵
{if (b[j] != NULL && k1 == -1){k1 = j;continue;}if (b[j] != NULL){k2 = j;break;}
}
for (j = k2; j < n; j++)
//从当前森林中求出最小权值树和次最小权值树
{if (b[j] != NULL){if (b[j]->data < b[k1]->data){k2 = k1;k1 = j;}else if (b[j]->data < b[k2]->data)k2 = j;}
}
//由最小权值树和次最小权值树建立一棵新树,q指向树根结点
q = malloc(sizeof(BTreeNode));
q->data = b[k1]->data + b[k2]->data;
q->left = b[k1];
q->right = b[k2];b[k1] = q;//将指向新树的指针赋给b指针数组中k1位置
b[k2] = NULL;//k2位置为空
Huffman编码与解码
首先给出求带权路径的递归实现:
double WeightPathLength(BTreeNode* FBT, int len) { //len = 0if (FBT == NULL) {//空树返回0return 0;}else{if (FBT->left == NULL && FBT->right == NULL)//访问到叶子结点return FBT->data * len;else //访问到非叶子结点,进行递归调用,返回左右子树的带权路径长度之和,len递增return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1);}
}
上述算法实际上通过双递归遍历了Huffman树。
改进上述算法得到求哈夫曼编码的实现:
static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值为0
{static int a[10];//定义静态数组a,保存每个叶子的编码,数组长度至少是树深度减一if (FBT != NULL)//访问到叶子结点时输出其保存在数组a中的0和1序列编码{if (FBT->left == NULL && FBT->right == NULL){int i;fprintf(fp,"%c %d:",c[index++],FBT->data);for (i = 0; i < len; i++)fprintf(fp,"%d", a[i]);fprintf(fp,"\n");}else//访问到非叶子结点时分别向左右子树递归调用,并把分支上的0、1编码保存到数组a{ //的对应元素中,向下深入一层时len值增1a[len] = 0;HuffManCoding(fp, FBT->left, len + 1);a[len] = 1;HuffManCoding(fp, FBT->right, len + 1);}}
}
节点的Huffman编码由它在Huffman树中的位置决定。从根节点到任意节点有且仅有一条路径,且路径可以唯一确定节点。因此规定从左子结点经过编码为0,从右子结点经过编码为1,路径序列作为编码。
由Huffman树和Huffman编码的性质可知,Huffman编码是一种不等长编码。在构造过程中,两个权值较小的节点生成一棵新的二叉树,根节点的权值为左右子节点的和,并不实际代表字符。也就是说,较短的编码不可能是较长编码的前缀。
Huffman树从叶子到根构造,靠近根的字符节点权值与几个靠近叶子的节点权值和相近,故而靠近根的字符节点权值较高即编码较短。
解码过程可以由字符串匹配来完成:
//Decoding
for(i = 0; code[i]; i++) {for (j = 0; j < n; j++) {t = 1;for (k = 0; coding[j][k]; k++) {if (code[i + k] != coding[j][k]) {t = 0;break;}}if (t == 1) {append(out,c[j]);i = i + k - 1;break;}}
}
printf("%s\n",out);
//Huffman.c
#include<stdio.h>
#include<string.h>
#include<stdlib.h>typedef struct
{int data;struct BTreeNode* left;struct BTreeNode* right;
}BTreeNode;#define M 32
char coding[M][M];BTreeNode* CreateHuffman(int a[], int n)
{int i, j;BTreeNode **b, *q;b = malloc(n*sizeof(BTreeNode));for (i = 0; i < n; i++) {b[i] = malloc(sizeof(BTreeNode));b[i]->data = a[i];b[i]->left = NULL;b[i]->right = NULL;}for (i = 1; i < n; i++)//进行 n-1 次循环建立哈夫曼树{int k1 = -1, k2;for (j = 0; j < n; j++) {if (b[j] != NULL && k1 == -1){k1 = j;continue;}if (b[j] != NULL){k2 = j;break;}}for (j = k2; j < n; j++)//从当前森林中求出最小权值树和次最小{if (b[j] != NULL){if (b[j]->data < b[k1]->data){k2 = k1;k1 = j;}else if (b[j]->data < b[k2]->data)k2 = j;}}q = malloc(sizeof(BTreeNode));q->data = b[k1]->data + b[k2]->data;q->left = b[k1];q->right = b[k2];b[k1] = q;b[k2] = NULL;}free(b); return q;
}double WeightPathLength(BTreeNode* FBT, int len)//len初始为0
{if (FBT == NULL) {return 0;}else {if (FBT->left == NULL && FBT->right == NULL) {return FBT->data * len;}else {return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1);}}
}static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值为0
{static int a[10]; if (FBT != NULL) {if (FBT->left == NULL && FBT->right == NULL) {int i;fprintf(fp,"%c %d:",c[index++],FBT->data);for (i = 0; i < len; i++)fprintf(fp,"%d", a[i]);fprintf(fp,"\n");}else { a[len] = 0;HuffManCoding(fp, FBT->left, len + 1);a[len] = 1;HuffManCoding(fp, FBT->right, len + 1);}}
}void append(char *str, char ch) {int i;for (i = 0; str[i];i++);str[i] = ch;str[i+1] = '\0';
}int main()
{int i, j, k, n, t;int* arr;char ch, in[M] = {'\0'}, code[M*M] = {'\0'}, out[M] = {'\0'};BTreeNode* fbt;FILE *fp;//Inputfreopen("test.in","r",stdin);scanf("%d", &n);arr = (int *)malloc(n * sizeof(int));c = (char *)malloc(n * sizeof(char));arr[0] = 186;c[0] = ' ';//原谅楼主这里偷懒,空格字符的输入有点麻烦所以直接写入了for (i = 1; i < n; i++) {getchar();scanf("%c %d",&c[i],&arr[i]);}//huffman codingfbt = CreateHuffman(arr, n);fp = fopen("code.txt","w");HuffManCoding(fp, fbt, 0);fflush(fp);//Encodingfp = fopen("code.txt","r");for (i = 0; i < n; i++) {fgetc(fp);fscanf(fp,"%c %d:%s", &t, &ch, &coding[i]);}fp = fopen("src.in","r");fscanf(fp, "%s", in);for (i = 0; in[i]; i++) {for (j = 0; j < n; j++) {if (c[j] == in[i]) {strcat(code,coding[j]);}}}printf("%s\n",code);//Decodingfor(i = 0; code[i]; i++) {for (j = 0; j < n; j++) {t = 1;for (k = 0; coding[j][k]; k++) {if (code[i + k] != coding[j][k]) {t = 0;break;}}if (t == 1) {append(out,c[j]);i = i + k - 1;break;}}}printf("%s\n",out);return 0;
}
测试数据:
test.in:
27
a 4
b 13
c 22
d 32
e 103
f 21
g 15
h 47
i 57
j 1
k 5
l 32
m 20
n 57
o 63
p 15
q 1
r 48
s 51
t 80
u 23
v 8
w 18
x 1
y 16
z 1