Page 1 of 1

huffman

Posted: Tue Jan 13, 2004 5:44 pm
by df
hmm we were talking huffman the other day... so I was bored and punched out the start of the huffman.. statisical analysis of file... all you need to is put it into a tree.. voila. optimal huffman codes.

Code: Select all

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct freq
{
   struct freq *next, *prev;
   int   item;
   int count;
};

#define ALLOCNODE   ((struct freq*)calloc(1, sizeof(struct freq)))

void MoveNodeUp(struct freq *n);
void DumpNodes(void);
void AddNode(unsigned char x);
struct freq *FindNode(unsigned char x);
void BalanceNode(struct freq *f);

/* global vars */
struct freq *head;
struct freq *tail;
FILE *fp;

int main(int argc, char *argv[])
{
   unsigned long offs, flen;
   unsigned char b;
   unsigned char buffer[1024];

   struct freq *f;
      
   if(argc<2)
      exit(0);
   else
   fp = fopen(argv[1], "rb");
   if(fp!=NULL)
   {
      printf("init freq table\n");

      head = ALLOCNODE;
      tail = ALLOCNODE;

      head->next = tail;
      tail->prev = head;
      
      offs = 0;
      
      flen=fread(buffer, 1, 1024, fp);
      
      while (flen > 0)
      {
         b = buffer[ offs++ ];

         // do the stat count!
         f = FindNode(b);
         if(f == NULL)
         {
            // insert new node
            AddNode(b);
         }
         else
         {
            f->count++;
            BalanceNode(f);
         }
         
         if(offs==flen)
         {
            memset(buffer, 0x0, 1024);
            flen=fread(buffer, 1, 1024, fp);
            offs = 0;
         }
      }
      
      fclose(fp);
      
      DumpNodes();
   }
   else
   {
      printf("Unable to open file\n");
   }
}

struct freq *FindNode(unsigned char x)
{
   struct freq *h;

   h=head;

   do
   {
      if(h->item == x)
         return h;
      else
         h=h->next;
   }while(h!=tail);
   // if here, we didnt find anything
   return NULL;
}

void AddNode(unsigned char x)
{
   struct freq *f;

   f = ALLOCNODE;   
   f->item = x;
   f->count = 1;
   
   f->next = tail;
   f->prev = tail->prev;
   
   // insert between tail->prev and tail
   
   (tail->prev)->next = f;
   tail->prev = f;

   BalanceNode(f);
}

void DumpNodes(void)
{
   struct freq *f;
   int i;

   f=head->next;
   i=0;

   while(f!=tail)
   {
      if(f!=tail)
      {         
         switch(f->item)
         {
            case 0x20:
               printf("<spc>");
               break;
            case 0x0a:
               printf(" <cr>");
               break;
            case 0x0d:
               printf(" <lf>");
               break;
            case 0x09:
               printf("<tab>");
               break;
            case 0x08:
               printf(" <bs>");
               break;
            default:
               printf("    %c", f->item);
               break;
         }
                  
         printf(" : %5li  ", f->count);
         
         if(++i%5==0)
            printf("\n");
      }
      f=f->next;
   }
   printf("\n\n");

   i=0;
   f=head->next;
   while(f!=tail)
   {
      switch(f->item)
      {
         case 0x0A:
         case 0x0D:
            printf("~");
            break;         
         default:
            printf("%c", f->item);
            break;
      }
      f=f->next;
      i++;
   }
   
   printf("\n%li characters\n\n", i);
}

void BalanceNode(struct freq *f)
{
   // we just incremented f, so check its neighbors.
   struct freq *a, *b;

   if(f->prev == head)
      return;

   a = f->prev;
   b = f;

   while(a->count < b->count)
   {
      MoveNodeUp(b);

      a = f->prev;
      b = f;
      
      if(a == head)
         return;
   }

   a = f->prev;
   b = f;

   while(a->count == b->count &&  a->item > b->item)
   {
      MoveNodeUp(b);
      
      a = f->prev;
      b = f;
      
      if(a == head)
         return;
   }
}

void MoveNodeUp(struct freq *n)
{
   struct freq *a, *b, *c, *d;
   
   c = n;
   d = n->next;
   b = n->prev;
   a = b->prev;

   // a, b, c, d -> a, c, b, d
   
   a->next = c;
   c->next = b;
   b->next = d;
   
   d->prev = b;
   b->prev = c;
   c->prev = a;
}