Hash Function in C (I)
一.什麽是 雜湊函式?
hash function is one to one function?
ps.圖片摘自wiki
二.常見的雜湊函式形式
a.md5 http://en.wikipedia.org/wiki/MD5
b.sha1/256
c.Alder32 http://blog.csdn.net/russell_tao/article/details/7240661
d.perl/php hash
三.用途
a.加速搜尋速度
b.確保傳遞真實的信息
c.影藏資料的儲存方式
四.碰撞(Collision):兩個鍵值對應到同一個位置最壞的情況:化成 O(N) 搜尋
a. 最簡單的hash function:把每個字元的 ascii code 加起來 mod 100
ex. 123 = (31+32+33) mod 100 = 96
b. 實際上碰撞率很高的hash function 也有用途 (rsync)
A = 1 + D1 + D2 + ... + Dn (mod 65521)
B = (1 + D1) + (1 + D1 + D2) + ... + (1 + D1 + D2 + ... + Dn) (mod 65521)
= n×D1 + (n−1)×D2 + (n−2)×D3 + ... + Dn + n (mod 65521)
Adler-32(D) = B × 65536 + A
alder32/md5
s1:1234 5678 90
s2:1234 abc4 5678 90
ps. hash 值不同, 則原始資訊必不同 , hash 值相同因碰撞機率大需要使用md5確認
---
c. git 的應用
將內容做 SHA1("test1")= b444ac06613fc8d63795be9ad0beaf55011936ac , 將檔案存於 ./b4/44/b444ac06613fc8d63795be9ad0beaf55011936ac
紀錄 檔名 luke.txt → b444ac06613fc8d63795be9ad0beaf55011936ac
五. 安全性
php 前一段時間曾經修復 ddos 攻擊事件
http://my.oschina.net/keengo/blog/38648
md5 crack
六.原始碼
djb2 是一個很神秘的hash function,他的發明人沒有說明為什麽是這些數字,但實際的統計上,這個簡單的function 讓ascii 的字元平均分散,perl / php … 很多語言都已這個基礎去實做自己的 函式
/*
* hash.c
*
* Created on: 2010/12/26
* Author: luke
*/
#include "xps_hash.h"
#define KEY_MAXLEN 64
#define VAL_MAXLEN 64
struct linklist {
char k[KEY_MAXLEN];
char v[VAL_MAXLEN];
struct linklist *next;
};
struct linklist *record[32768];
static unsigned long djb2(const char *str) {
unsigned long hash = 5381;
unsigned char c;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
/*
x = 131 % 4; <- 4為2的倍數
equals:
x = 131 & (4 - 1);
*/
// 32767 = 2^15-1
return hash & 32767;
}
//array[idx]-> (insert to here) ->Tail->N2->N1->Head->Null
int myhash(char *key, char *var) {
struct linklist *Node = NULL;
struct linklist *Rec = NULL;
struct linklist *Tail = NULL;
unsigned long idx = djb2(key);
//這個record 是否為空,如果不為空有兩種狀況
//a.產生碰撞
//b.這個key 之前已使用過
if (record[idx] == NULL) {
Node = (struct linklist *) calloc(1, sizeof(struct linklist));
Node->next = NULL;
//bzero(Node->k,KEY_MAXLEN);
int len_key = strlen(key);
if (len_key >= KEY_MAXLEN) {
len_key = KEY_MAXLEN - 1;
}
memcpy(Node->k, key, len_key);
Node->k[len_key] = '\0';
//bzero(Node->v, VAL_MAXLEN);
int len_var = strlen(var);
if (len_var >= VAL_MAXLEN) {
len_var = VAL_MAXLEN - 1;
}
memcpy(Node->v, var, len_var);
Node->v[len_var] = '\0';
record[idx] = Node;
} else {
Tail = record[idx];
Rec = Tail;
while (Rec != NULL) {
if (strcmp(Rec->k, key) == 0) {
//key 之前已被使用過
bzero(Rec->v, VAL_MAXLEN);
int len_var = strlen(var);
if (len_var >= VAL_MAXLEN) {
len_var = VAL_MAXLEN - 1;
}
memcpy(Rec->v, var, len_var);
Rec->v[len_var] = '\0';
return 0;
}
Rec = Rec->next;
}
//因為碰撞而存在的資料
Node->next = Tail;
//bzero(Node->k, KEY_MAXLEN);
int len_key = strlen(key);
if (len_key >= KEY_MAXLEN) {
len_key = KEY_MAXLEN - 1;
}
memcpy(Node->k, key, len_key);
Node->k[len_key] = '\0';
//bzero(Node->v, VAL_MAXLEN);
int len_var = strlen(var);
if (len_var >= VAL_MAXLEN) {
len_var = VAL_MAXLEN - 1;
}
memcpy(Node->v, var, len_var);
Node->v[len_var] = '\0';
record[idx] = Node;
}
return 0;
}
//array[idx]->Tail->N4->N3->N2->N1->Head->Null
int myhash_delete(char *key) {
unsigned long idx = djb2(key);
struct linklist *Rec = NULL;
struct linklist *Prev = NULL;
struct linklist *Tail = NULL;
struct linklist *delRec = NULL;
int status = 0;
Rec = record[idx];
if (Rec == NULL) {
return 0;
}
Tail = Rec;
while (Rec != NULL) {
Prev = Rec;
if (strcmp(Rec->k, key) == 0) {
delRec = Rec;
status = 1;
Rec = Rec->next;
break;
}
Rec = Rec->next;
}
if (status == 1) {
free(delRec);
if (Prev == Tail) {
record[idx] = Rec;
} else {
Prev->next = Rec;
}
}
return 0;
}
char *myhash_find(const char *key) {
if (key == NULL) {
return NULL;
}
unsigned long idx = djb2(key);
struct linklist *Rec = NULL;
Rec = record[idx];
while (Rec != NULL) {
if (strcmp(Rec->k, key) == 0) {
return Rec->v;
}
Rec = Rec->next;
}
return NULL;
}
void search(void) {
int i = 0;
struct linklist *Rec = NULL;
for (i = 0; i <= 30000; i++) {
//printf("idx1:%d\n",i);
Rec = record[i];
//printf("idx2:%d\n",i);
while (Rec != NULL) {
printf("search: %s:%s\n", Rec->k, Rec->v);
Rec = Rec->next;
}
}
}
六. hash function 的其他應用...