Hash Function in C (I)

一.什麽是 雜湊函式?

hash function is one to one function?

ps.圖片摘自wiki

二.常見的雜湊函式形式

a.md5  http://en.wikipedia.org/wiki/MD5

b.sha1/256

c.Alder32 http://blog.csdn.net/russell_tao/article/details/7240661

d.perl/php hash

三.用途

a.加速搜尋速度

b.確保傳遞真實的信息

c.影藏資料的儲存方式

四.碰撞(Collision):兩個鍵值對應到同一個位置最壞的情況:化成 O(N) 搜尋

a. 最簡單的hash function:把每個字元的 ascii code 加起來 mod 100

ex.  123 = (31+32+33) mod 100 = 96

b. 實際上碰撞率很高的hash function 也有用途 (rsync)

A = 1 + D1 + D2 + ... + Dn (mod 65521)  

B = (1 + D1) + (1 + D1 + D2) + ... + (1 + D1 + D2 + ... + Dn) (mod 65521)  

  = n×D1 + (n−1)×D2 + (n−2)×D3 + ... + Dn + n (mod 65521)  

 

Adler-32(D) = B × 65536 + A

alder32/md5

s1:1234    5678   90

s2:1234    abc4   5678   90

ps. hash 值不同, 則原始資訊必不同 , hash 值相同因碰撞機率大需要使用md5確認

---

c. git 的應用

將內容做 SHA1("test1")= b444ac06613fc8d63795be9ad0beaf55011936ac , 將檔案存於 ./b4/44/b444ac06613fc8d63795be9ad0beaf55011936ac

紀錄 檔名  luke.txt → b444ac06613fc8d63795be9ad0beaf55011936ac

五. 安全性

php  前一段時間曾經修復 ddos 攻擊事件

http://my.oschina.net/keengo/blog/38648

md5 crack

http://md5.web-max.ca/

六.原始碼

djb2 是一個很神秘的hash function,他的發明人沒有說明為什麽是這些數字,但實際的統計上,這個簡單的function 讓ascii 的字元平均分散,perl / php … 很多語言都已這個基礎去實做自己的 函式

/*

 * hash.c

 *

 *  Created on: 2010/12/26

 *      Author: luke

 */

#include "xps_hash.h"

#define KEY_MAXLEN 64

#define VAL_MAXLEN 64

struct linklist {

        char k[KEY_MAXLEN];

        char v[VAL_MAXLEN];

        struct linklist *next;

};

struct linklist *record[32768];

static unsigned long djb2(const char *str) {

        unsigned long hash = 5381;

        unsigned char c;

        while ((c = *str++)) {

                hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

        }

        /*

         x = 131 % 4; <- 4為2的倍數

         equals:

         x = 131 & (4 - 1);

         */

        // 32767 = 2^15-1

        return hash & 32767;

}

//array[idx]-> (insert to here) ->Tail->N2->N1->Head->Null

int myhash(char *key, char *var) {

        struct linklist *Node = NULL;

        struct linklist *Rec = NULL;

        struct linklist *Tail = NULL;

        unsigned long idx = djb2(key);

        //這個record 是否為空,如果不為空有兩種狀況

        //a.產生碰撞

        //b.這個key 之前已使用過

        if (record[idx] == NULL) {

                Node = (struct linklist *) calloc(1, sizeof(struct linklist));

                Node->next = NULL;

                //bzero(Node->k,KEY_MAXLEN);

                int len_key = strlen(key);

                if (len_key >= KEY_MAXLEN) {

                        len_key = KEY_MAXLEN - 1;

                }

                memcpy(Node->k, key, len_key);

                Node->k[len_key] = '\0';

                //bzero(Node->v, VAL_MAXLEN);

                int len_var = strlen(var);

                if (len_var >= VAL_MAXLEN) {

                        len_var = VAL_MAXLEN - 1;

                }

                memcpy(Node->v, var, len_var);

                Node->v[len_var] = '\0';

                record[idx] = Node;

        } else {

                Tail = record[idx];

                Rec = Tail;

                while (Rec != NULL) {

                        if (strcmp(Rec->k, key) == 0) {

                                //key 之前已被使用過

                                bzero(Rec->v, VAL_MAXLEN);

                                int len_var = strlen(var);

                                if (len_var >= VAL_MAXLEN) {

                                        len_var = VAL_MAXLEN - 1;

                                }

                                memcpy(Rec->v, var, len_var);

                                Rec->v[len_var] = '\0';

                                return 0;

                        }

                        Rec = Rec->next;

                }

                //因為碰撞而存在的資料

                Node->next = Tail;

                //bzero(Node->k, KEY_MAXLEN);

                int len_key = strlen(key);

                if (len_key >= KEY_MAXLEN) {

                        len_key = KEY_MAXLEN - 1;

                }

                memcpy(Node->k, key, len_key);

                Node->k[len_key] = '\0';

                //bzero(Node->v, VAL_MAXLEN);

                int len_var = strlen(var);

                if (len_var >= VAL_MAXLEN) {

                        len_var = VAL_MAXLEN - 1;

                }

                memcpy(Node->v, var, len_var);

                Node->v[len_var] = '\0';

                record[idx] = Node;

        }

        return 0;

}

//array[idx]->Tail->N4->N3->N2->N1->Head->Null

int myhash_delete(char *key) {

        unsigned long idx = djb2(key);

        struct linklist *Rec = NULL;

        struct linklist *Prev = NULL;

        struct linklist *Tail = NULL;

        struct linklist *delRec = NULL;

        int status = 0;

        Rec = record[idx];

        if (Rec == NULL) {

                return 0;

        }

        Tail = Rec;

        while (Rec != NULL) {

                Prev = Rec;

                if (strcmp(Rec->k, key) == 0) {

                        delRec = Rec;

                        status = 1;

                        Rec = Rec->next;

                        break;

                }

                Rec = Rec->next;

        }

        if (status == 1) {

                free(delRec);

                if (Prev == Tail) {

                        record[idx] = Rec;

                } else {

                        Prev->next = Rec;

                }

        }

        return 0;

}

char *myhash_find(const char *key) {

        if (key == NULL) {

                return NULL;

        }

        unsigned long idx = djb2(key);

        struct linklist *Rec = NULL;

        Rec = record[idx];

        while (Rec != NULL) {

                if (strcmp(Rec->k, key) == 0) {

                        return Rec->v;

                }

                Rec = Rec->next;

        }

        return NULL;

}

void search(void) {

        int i = 0;

        struct linklist *Rec = NULL;

        for (i = 0; i <= 30000; i++) {

                //printf("idx1:%d\n",i);

                Rec = record[i];

                //printf("idx2:%d\n",i);

                while (Rec != NULL) {

                        printf("search: %s:%s\n", Rec->k, Rec->v);

                        Rec = Rec->next;

                }

        }

}

六. hash function 的其他應用...