经典的Hash函数

PHP中出现的字符串Hash函数

static unsigned long hashpjw(char *arKey, unsigned int nKeyLength)
{
    unsigned long h = 0, g;
    char *arEnd = arKey + nKeyLength;
    while (arKey < arEnd)
    {
        h = (h << 4) + *arKey++;
        if ((g = (h & 0xF0000000)))
        {
            h = h ^ (g >> 24);
            h = h ^ g;
        }
    }
    return h;
}

OpenSSL中出现的字符串Hash函数

unsigned long lh_strhash(char *str)
{
    int i, l;
    unsigned long ret = 0;
    unsigned short *s;
    if (str == NULL) return (0);
    l = (strlen(str) + 1) / 2;
    s = (unsigned short *) str;
    for (i = 0; i < l; ++i)
    {
        ret ^= (s[i] << (i & 0x0f));
    }
    return (ret);
}

/* The following hash seems to work very well on normal text strings
 * no collisions on /usr/dict/words and it distributes on %2^n quite
 * well, not as good as MD5, but still good.
 */
unsigned long lh_strhash(const char *c)
{
    unsigned long ret = 0;
    long n;
    unsigned long v;
    int r;
    if ((c == NULL) || (*c == '\0'))
        return (ret);
    /*
    unsigned char b[16];
    MD5(c,strlen(c),b);
    return(b[0]|(b[1]<<8)|(b[2]<<16)|(b[3]<<24));
     */
    n = 0x100;
    while (*c)
    {

        v = n | (*c);
        n += 0x100;
        r = (int) ((v >> 2)^v)&0x0f;
        ret = (ret(32 - r));
        ret &= 0xFFFFFFFFL;
        ret ^= v*v;
        c++;
    }
    return ((ret >> 16)^ret);
}

MySql中出现的字符串Hash函数

/* Calc hash for a key */
static uint calc_hashnr(const byte *key, uint length)
{
    register uint nr = 1, nr2 = 4;
    while (length--)
    {
        nr ^= (((nr & 63) + nr2)*((uint) (uchar) * key++))+ (nr << 8);
        nr2 += 3;
    }
    return ((uint) nr);
}

/* Calc hash for a key, case indepenently */
static uint calc_hashnr_caseup(const byte *key, uint length)
{
    register uint nr = 1, nr2 = 4;
    while (length--)
    {
        nr ^= (((nr & 63) + nr2)*((uint) (uchar) toupper(*key++)))+ (nr << 8);
        nr2 += 3;
    }
    return ((uint) nr);
}
#else

/*
 * Fowler/Noll/Vo hash
 *
 * The basis of the hash algorithm was taken from an idea sent by email to the
 * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
 * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com)
 * later improved on their algorithm.
 *
 * The magic is in the interesting relationship between the special prime
 * 16777619 (2^24 + 403) and 2^32 and 2^8.
 *
 * This hash produces the fewest collisions of any that we've seen so
 * far, and works well on both numbers and strings.
 */
uint calc_hashnr(const byte *key, uint len)
{
    const byte *end = key + len;
    uint hash;
    for (hash = 0; key < end; key++)
    {
        hash *= 16777619;
        hash ^= (uint) *(uchar*) key;
    }
    return (hash);
}

uint calc_hashnr_caseup(const byte *key, uint len)
{
    const byte *end = key + len;
    uint hash;
    for (hash = 0; key < end; key++)
    {
        hash *= 16777619;
        hash ^= (uint) (uchar) toupper(*key);
    }
    return (hash);
}

C++ STL 中的hash函数

  inline size_t  __stl_hash_string(const char* __s)
  {
    unsigned long __h = 0;
    for ( ; *__s; ++__s)
      __h = 5 * __h + *__s;
    return size_t(__h);
  }

另一个经典的hash函数（这个函数跟 STL 的hash 函数几乎是一样的）

unsigned int hash(char *str)
{
    register unsigned int h;
    register unsigned char *p;
    for (h = 0, p = (unsigned char *) str; *p; p++)
        h = 31 * h + *p;
    return h;
}

测试及结果

测试说明

从上面给出的经典字符串Hash函数中可以看出，有的涉及到字符串大小敏感问题，我们的测试中只考虑字符串大小写敏感的函数，另外在上面的函数中有的函数需要长度参数，有的不需要长度参数，这对函数本身的效率有一定的影响，我们的测试中将对函数稍微作一点修改，全部使用长度参数，并将函数内部出现的计算长度代码删除。
我们用来作测试用的Hash链表采用经典的拉链法解决冲突，另外我们采用静态分配桶（Hash链表长度）的方法来构造Hash链表，这主要是为了简化我们的实现，并不影响我们的测试结果。
测试文本采用单词表，测试过程中从一个输入文件中读取全部不重复单词构造一个Hash表，测试内容分别是函数总调用次数、函数总调用时间、最大拉链长度、平均拉链长度、桶利用率（使用过的桶所占的比率），其中函数总调用次数是指Hash函数被调用的总次数，为了测试出函数执行时间，该值在测试过程中作了一定的放大，函数总调用时间是指Hash函数总的执行时间，最大拉链长度是指使用拉链法构造链表过程中出现的最大拉链长度，平均拉链长度指拉链的平均长度。
测试过程中使用的机器配置如下：
PIII600笔记本，128M内存，windows 2000 server操作系统。
测试结果

以下分别是对两个不同文本文件中的全部不重复单词构造Hash链表的测试结果，测试结果中函数调用次数放大了100倍，相应的函数调用时间也放大了100倍。

从下表可以看出，这些经典软件虽然构造字符串Hash函数的方法不同，但是它们的效率都是不错的，相互之间差距很小。

经典的Hash函数

原文链接: https://www.cnblogs.com/gcssys/archive/2013/03/25/3790311.html

欢迎关注

微信关注下方公众号，第一时间获取干货硬货；公众号内回复【pdf】免费获取数百本计算机经典书籍

原创文章受到原创版权保护。转载请注明出处：https://www.ccppcoding.com/archives/82010

非原创文章文中已经注明原地址，如有侵权，联系删除

关注公众号【高性能架构探索】，第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处！

经典的Hash函数

相关推荐