PDA

View Full Version : Fast hash functions??



chronozphere
10-11-2010, 06:51 PM
Hey guys

I did an attempt to port this hash function (http://www.azillionmonkeys.com/qed/hash.html) to pascal. Here's the result:



function getWord(const data: PChar): Word;
begin
Result := PWord( data )^;
end;

function SuperFastHash(const str: String): Cardinal;
var
data: PChar;
hash, tmp: Cardinal;
rem, len: Integer;
begin
Result := 0;
if str = '' then Exit;
hash := len;

data := PChar(str);
len := Length(str);

rem := len and 3;
len := len shr 2;

//Main loop
while len > 0 do
begin
hash := hash + getWord( data );
tmp := (getWord( data+2 ) shl 11) xor hash;
hash := (hash shl 16) xor tmp;
data := data + 2 * sizeof(Word);
hash := hash + (hash shr 11);
dec(len);
end;

//Handle end cases
case rem of
3: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 16);
hash := hash xor (byte(data[ sizeof(Word) ]) shl 18);
hash := hash + (hash shr 11);
end;
2: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 11);
hash := hash + (hash shr 17);
end;
1: begin
hash := hash + PByte(data)^;
hash := hash xor (hash shl 10);
hash := hash + (hash shr 1);
end;
end;

//Force avalanching of final 127 bits
hash := hash xor (hash shl 3);
hash := hash + (hash shr 5);
hash := hash xor (hash shl 4);
hash := hash + (hash shr 17);
hash := hash xor (hash shl 25);
hash := hash + (hash shr 6);

Result := hash;
end;


You can test it with the following code:



//Benchmark routine
procedure TForm1.Button2Click(Sender: TObject);
var
I, J: Integer;
T1, T2, F: Int64;
ms: Single;
Strings: array [0..2] of string;
const
NUMBER = 5000000;
begin
for I := 0 to 2 do
for J := 0 to 255 do
Strings[I] := Strings[I] + Char(Random(255));

Randomize();
QueryPerformanceCounter(T1);

for I := 0 to NUMBER do
SuperFastHash( Strings[ ((I mod 5)*(I mod 8)+2) mod 3 ] );

QueryPerformanceCounter(T2);
QueryPerformanceFrequency(F);
ms := ((T2 - T1) / F) * 1000;
ShowMessage(IntToStr(NUMBER)+' executions in: '+Format('%3.3n',[ms])+'ms')
end;

//Tests whether the function is deterministic (allways returns the same output for the same input)
procedure TForm1.Button3Click(Sender: TObject);
var
I, J: Integer;
H1, H2: Cardinal;
Data: String;
begin
for I := 0 to 1000 do
begin
Data := '';
for J := 0 to 255 do
Data := Data + Char(Random(255));

Assert( SuperFastHash( Data ) = SuperFastHash( Data ) );
end;
ShowMessage('Test passed!');
end;


So far it seems to work quite good. The benchmark function tells me that it can execute 5.000.000 times in 1.77 seconds on my 2.8Ghz machine.

I do have three questions:

The function seems to work. However, I don't know if I made a mistake somewhere during the translation. Could someone take a brief look and tell me if my type-casts are OK?? This might not be neccesary, but I'd still appreciate it.

Also, do you guys know other fast hashing algorithms? Maybe it would be fun to compare this one to others, to see which is fastest. ;)

I would like to use a fast hashing function as a base for a portable HashMap class (that also works in delphi). I guess I should just create an array with size N and do "mod N" on the result returned by the hash-function. Is that a good way to implement it?

Thanks!

User137
10-11-2010, 11:34 PM
At the beginning variable hash is not initialized, because at that point len has no value. Variables that are defined in a procedure are not automatically reset to 0, they can have any imaginary random value.

hash := len;

chronozphere
11-11-2010, 12:05 AM
Thanks for pointing that out. ;)

Jimmy Valavanis
12-11-2010, 11:45 AM
Here is a version with very little changes that runs at least twice as faster (compiled with Delphi 7)



program testhash;

{$APPTYPE CONSOLE}


uses Windows, SysUtils;


function getWord(const data: PChar): Word;
begin
Result := PWord( data )^;
end;

function SuperFastHash(const str: String): Cardinal;
var
data: PChar;
hash, tmp: Cardinal;
rem, len: Integer;
begin
Result := 0;
if str = '' then Exit;

len := Length(str);
hash := len;

data := PChar(str);

rem := len and 3;
len := len shr 2;

//Main loop
while len > 0 do
begin
hash := hash + getWord( data );
tmp := (getWord( data+2 ) shl 11) xor hash;
hash := (hash shl 16) xor tmp;
data := data + 2 * sizeof(Word);
hash := hash + (hash shr 11);
dec(len);
end;

//Handle end cases
case rem of
3: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 16);
hash := hash xor (byte(data[ sizeof(Word) ]) shl 18);
hash := hash + (hash shr 11);
end;
2: begin
hash := hash + getWord(data);
hash := hash xor (hash shl 11);
hash := hash + (hash shr 17);
end;
1: begin
hash := hash + PByte(data)^;
hash := hash xor (hash shl 10);
hash := hash + (hash shr 1);
end;
end;

//Force avalanching of final 127 bits
hash := hash xor (hash shl 3);
hash := hash + (hash shr 5);
hash := hash xor (hash shl 4);
hash := hash + (hash shr 17);
hash := hash xor (hash shl 25);
hash := hash + (hash shr 6);

Result := hash;
end;



function SuperFastHash2(const str: String): Cardinal;
var
data: PChar;
hash, tmp: Cardinal;
rem, len: Integer;
begin
Result := 0;
if str = '' then Exit;
len := Length(str);
hash := len;

data := @str[1];

rem := len and 3;
len := len shr 2;

//Main loop
while len > 0 do
begin
hash := hash + PWord( data )^;
inc(data, 2);
tmp := (PWord( data )^ shl 11) xor hash;
hash := (hash shl 16) xor tmp;
inc(data, 2);
hash := hash + (hash shr 11);
dec(len);
end;

//Handle end cases
case rem of
3: begin
hash := hash + PWord( data )^;
hash := hash xor (hash shl 16);
hash := hash xor (byte(data[ sizeof(Word) ]) shl 18);
hash := hash + (hash shr 11);
end;
2: begin
hash := hash + PWord( data )^;
hash := hash xor (hash shl 11);
hash := hash + (hash shr 17);
end;
1: begin
hash := hash + PByte(data)^;
hash := hash xor (hash shl 10);
hash := hash + (hash shr 1);
end;
end;

//Force avalanching of final 127 bits
hash := hash xor (hash shl 3);
hash := hash + (hash shr 5);
hash := hash xor (hash shl 4);
hash := hash + (hash shr 17);
hash := hash xor (hash shl 25);
hash := hash + (hash shr 6);

Result := hash;
end;

var
I, J: Integer;
T1, T2, F: Int64;
ms: Single;
Strings: array [0..2] of string;
const
NUMBER = 5000000;
begin
for I := 0 to 2 do
for J := 0 to 255 do
Strings[I] := Strings[I] + Char(Random(255));

Randomize();


writeln('Strarting 1st method');

QueryPerformanceCounter(T1);

for I := 0 to NUMBER do
SuperFastHash( Strings[ ((I mod 5)*(I mod 8)+2) mod 3 ] );

QueryPerformanceCounter(T2);
QueryPerformanceFrequency(F);
ms := ((T2 - T1) / F) * 1000;
writeln('Finished 1st method');

writeln(IntToStr(NUMBER)+' executions in: '+Format('%3.3n',[ms])+'ms');

//-----------------------------------------------------------------

writeln('Strarting 2st method');

QueryPerformanceCounter(T1);

for I := 0 to NUMBER do
SuperFastHash2( Strings[ ((I mod 5)*(I mod 8)+2) mod 3 ] );

QueryPerformanceCounter(T2);
QueryPerformanceFrequency(F);
ms := ((T2 - T1) / F) * 1000;
writeln('Finished 2st method');

writeln(IntToStr(NUMBER)+' executions in: '+Format('%3.3n',[ms])+'ms');

readln;
end.


First method uses the function you posted, the second method has some minor changes (just get rid of getWord function)

chronozphere
14-11-2010, 01:08 AM
Hah great. Thanks for that one. :)

I was actually looking for a fast string hashing function, but I realize now that this is not really suitable. I'm talking about short < 32char strings.
Does anyone know a good/fast hash for those?

Thanks! ;)