blob: 2aa9de7d95b5fefbc44663f418a5d64003a286ed [file] [log] [blame]
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Spooky Hash
// A 128-bit noncryptographic hash, for checksums and table lookup
// By Bob Jenkins. Public domain.
// Oct 31 2010: published framework, disclaimer ShortHash isn't right
// Nov 7 2010: disabled ShortHash
// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
// April 10 2012: buffer overflow on platforms without unaligned reads
// July 12 2012: was passing out variables in final to in/out in short
// July 30 2012: I reintroduced the buffer overflow
#include <folly/SpookyHashV1.h>
#include <cstring>
#define ALLOW_UNALIGNED_READS 1
namespace folly {
namespace hash {
//
// short hash ... it could be used on any message,
// but it's used by Spooky just for short messages.
//
void SpookyHashV1::Short(
const void *message,
size_t length,
uint64_t *hash1,
uint64_t *hash2)
{
uint64_t buf[2*sc_numVars];
union
{
const uint8_t *p8;
uint32_t *p32;
uint64_t *p64;
size_t i;
} u;
u.p8 = (const uint8_t *)message;
if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
{
memcpy(buf, message, length);
u.p64 = buf;
}
size_t remainder = length%32;
uint64_t a=*hash1;
uint64_t b=*hash2;
uint64_t c=sc_const;
uint64_t d=sc_const;
if (length > 15)
{
const uint64_t *end = u.p64 + (length/32)*4;
// handle all complete sets of 32 bytes
for (; u.p64 < end; u.p64 += 4)
{
c += u.p64[0];
d += u.p64[1];
ShortMix(a,b,c,d);
a += u.p64[2];
b += u.p64[3];
}
//Handle the case of 16+ remaining bytes.
if (remainder >= 16)
{
c += u.p64[0];
d += u.p64[1];
ShortMix(a,b,c,d);
u.p64 += 2;
remainder -= 16;
}
}
// Handle the last 0..15 bytes, and its length
d = ((uint64_t)length) << 56;
switch (remainder)
{
case 15:
d += ((uint64_t)u.p8[14]) << 48;
case 14:
d += ((uint64_t)u.p8[13]) << 40;
case 13:
d += ((uint64_t)u.p8[12]) << 32;
case 12:
d += u.p32[2];
c += u.p64[0];
break;
case 11:
d += ((uint64_t)u.p8[10]) << 16;
case 10:
d += ((uint64_t)u.p8[9]) << 8;
case 9:
d += (uint64_t)u.p8[8];
case 8:
c += u.p64[0];
break;
case 7:
c += ((uint64_t)u.p8[6]) << 48;
case 6:
c += ((uint64_t)u.p8[5]) << 40;
case 5:
c += ((uint64_t)u.p8[4]) << 32;
case 4:
c += u.p32[0];
break;
case 3:
c += ((uint64_t)u.p8[2]) << 16;
case 2:
c += ((uint64_t)u.p8[1]) << 8;
case 1:
c += (uint64_t)u.p8[0];
break;
case 0:
c += sc_const;
d += sc_const;
}
ShortEnd(a,b,c,d);
*hash1 = a;
*hash2 = b;
}
// do the whole hash in one call
void SpookyHashV1::Hash128(
const void *message,
size_t length,
uint64_t *hash1,
uint64_t *hash2)
{
if (length < sc_bufSize)
{
Short(message, length, hash1, hash2);
return;
}
uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
uint64_t buf[sc_numVars];
uint64_t *end;
union
{
const uint8_t *p8;
uint64_t *p64;
size_t i;
} u;
size_t remainder;
h0=h3=h6=h9 = *hash1;
h1=h4=h7=h10 = *hash2;
h2=h5=h8=h11 = sc_const;
u.p8 = (const uint8_t *)message;
end = u.p64 + (length/sc_blockSize)*sc_numVars;
// handle all whole sc_blockSize blocks of bytes
if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
{
while (u.p64 < end)
{
Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
u.p64 += sc_numVars;
}
}
else
{
while (u.p64 < end)
{
memcpy(buf, u.p64, sc_blockSize);
Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
u.p64 += sc_numVars;
}
}
// handle the last partial block of sc_blockSize bytes
remainder = (length - ((const uint8_t *)end-(const uint8_t *)message));
memcpy(buf, end, remainder);
memset(((uint8_t *)buf)+remainder, 0, sc_blockSize-remainder);
((uint8_t *)buf)[sc_blockSize-1] = remainder;
Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
// do some final mixing
End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
*hash1 = h0;
*hash2 = h1;
}
// init spooky state
void SpookyHashV1::Init(uint64_t seed1, uint64_t seed2)
{
m_length = 0;
m_remainder = 0;
m_state[0] = seed1;
m_state[1] = seed2;
}
// add a message fragment to the state
void SpookyHashV1::Update(const void *message, size_t length)
{
uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
size_t newLength = length + m_remainder;
uint8_t remainder;
union
{
const uint8_t *p8;
uint64_t *p64;
size_t i;
} u;
const uint64_t *end;
// Is this message fragment too short? If it is, stuff it away.
if (newLength < sc_bufSize)
{
memcpy(&((uint8_t *)m_data)[m_remainder], message, length);
m_length = length + m_length;
m_remainder = (uint8_t)newLength;
return;
}
// init the variables
if (m_length < sc_bufSize)
{
h0=h3=h6=h9 = m_state[0];
h1=h4=h7=h10 = m_state[1];
h2=h5=h8=h11 = sc_const;
}
else
{
h0 = m_state[0];
h1 = m_state[1];
h2 = m_state[2];
h3 = m_state[3];
h4 = m_state[4];
h5 = m_state[5];
h6 = m_state[6];
h7 = m_state[7];
h8 = m_state[8];
h9 = m_state[9];
h10 = m_state[10];
h11 = m_state[11];
}
m_length = length + m_length;
// if we've got anything stuffed away, use it now
if (m_remainder)
{
uint8_t prefix = sc_bufSize-m_remainder;
memcpy(&(((uint8_t *)m_data)[m_remainder]), message, prefix);
u.p64 = m_data;
Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
u.p8 = ((const uint8_t *)message) + prefix;
length -= prefix;
}
else
{
u.p8 = (const uint8_t *)message;
}
// handle all whole blocks of sc_blockSize bytes
end = u.p64 + (length/sc_blockSize)*sc_numVars;
remainder = (uint8_t)(length-((const uint8_t *)end-u.p8));
if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
{
while (u.p64 < end)
{
Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
u.p64 += sc_numVars;
}
}
else
{
while (u.p64 < end)
{
memcpy(m_data, u.p8, sc_blockSize);
Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
u.p64 += sc_numVars;
}
}
// stuff away the last few bytes
m_remainder = remainder;
memcpy(m_data, end, remainder);
// stuff away the variables
m_state[0] = h0;
m_state[1] = h1;
m_state[2] = h2;
m_state[3] = h3;
m_state[4] = h4;
m_state[5] = h5;
m_state[6] = h6;
m_state[7] = h7;
m_state[8] = h8;
m_state[9] = h9;
m_state[10] = h10;
m_state[11] = h11;
}
// report the hash for the concatenation of all message fragments so far
void SpookyHashV1::Final(uint64_t *hash1, uint64_t *hash2)
{
// init the variables
if (m_length < sc_bufSize)
{
*hash1 = m_state[0];
*hash2 = m_state[1];
Short( m_data, m_length, hash1, hash2);
return;
}
const uint64_t *data = (const uint64_t *)m_data;
uint8_t remainder = m_remainder;
uint64_t h0 = m_state[0];
uint64_t h1 = m_state[1];
uint64_t h2 = m_state[2];
uint64_t h3 = m_state[3];
uint64_t h4 = m_state[4];
uint64_t h5 = m_state[5];
uint64_t h6 = m_state[6];
uint64_t h7 = m_state[7];
uint64_t h8 = m_state[8];
uint64_t h9 = m_state[9];
uint64_t h10 = m_state[10];
uint64_t h11 = m_state[11];
if (remainder >= sc_blockSize)
{
// m_data can contain two blocks; handle any whole first block
Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
data += sc_numVars;
remainder -= sc_blockSize;
}
// mix in the last partial block, and the length mod sc_blockSize
memset(&((uint8_t *)data)[remainder], 0, (sc_blockSize-remainder));
((uint8_t *)data)[sc_blockSize-1] = remainder;
Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
// do some final mixing
End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
*hash1 = h0;
*hash2 = h1;
}
} // namespace hash
} // namespace folly