对于非常小的馆藏,差异将可忽略不计。如果您要进行大量查找,则在范围的低端(500k项),您会开始发现差异。二进制搜索将为O(log n),而哈希查找将为O(1),分期偿还。这与真正的常数并不相同,但是您仍然必须拥有一个非常糟糕的哈希函数才能获得比二进制搜索更差的性能。
(当我说“可怕的哈希”时,我的意思是:
hashCode()
{
return 0;
}
是的,它本身速度很快,但是会导致您的哈希映射成为链接列表。)
ialiashkevich使用数组和Dictionary编写了一些C#代码,以比较这两种方法,但是它使用Long值作为键。我想测试在查找过程中实际执行哈希函数的内容,因此我修改了该代码。我将其更改为使用String值,并且将填充和查找部分重构为它们自己的方法,因此更容易在事件探查器中查看。作为比较,我还留了使用Long值的代码。最后,我摆脱了自定义二进制搜索功能,并在Array
类中使用了该功能。
这是代码:
class Program
{
private const long capacity = 10_000_000;
private static void Main(string[] args)
{
testLongValues();
Console.WriteLine();
testStringValues();
Console.ReadLine();
}
private static void testStringValues()
{
Dictionary<String, String> dict = new Dictionary<String, String>();
String[] arr = new String[capacity];
Stopwatch stopwatch = new Stopwatch();
Console.WriteLine("" + capacity + " String values...");
stopwatch.Start();
populateStringArray(arr);
stopwatch.Stop();
Console.WriteLine("Populate String Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
populateStringDictionary(dict, arr);
stopwatch.Stop();
Console.WriteLine("Populate String Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
Array.Sort(arr);
stopwatch.Stop();
Console.WriteLine("Sort String Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchStringDictionary(dict, arr);
stopwatch.Stop();
Console.WriteLine("Search String Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchStringArray(arr);
stopwatch.Stop();
Console.WriteLine("Search String Array: " + stopwatch.ElapsedMilliseconds);
}
/* Populate an array with random values. */
private static void populateStringArray(String[] arr)
{
for (long i = 0; i < capacity; i++)
{
arr[i] = generateRandomString(20) + i; // concatenate i to guarantee uniqueness
}
}
/* Populate a dictionary with values from an array. */
private static void populateStringDictionary(Dictionary<String, String> dict, String[] arr)
{
for (long i = 0; i < capacity; i++)
{
dict.Add(arr[i], arr[i]);
}
}
/* Search a Dictionary for each value in an array. */
private static void searchStringDictionary(Dictionary<String, String> dict, String[] arr)
{
for (long i = 0; i < capacity; i++)
{
String value = dict[arr[i]];
}
}
/* Do a binary search for each value in an array. */
private static void searchStringArray(String[] arr)
{
for (long i = 0; i < capacity; i++)
{
int index = Array.BinarySearch(arr, arr[i]);
}
}
private static void testLongValues()
{
Dictionary<long, long> dict = new Dictionary<long, long>(Int16.MaxValue);
long[] arr = new long[capacity];
Stopwatch stopwatch = new Stopwatch();
Console.WriteLine("" + capacity + " Long values...");
stopwatch.Start();
populateLongDictionary(dict);
stopwatch.Stop();
Console.WriteLine("Populate Long Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
populateLongArray(arr);
stopwatch.Stop();
Console.WriteLine("Populate Long Array: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchLongDictionary(dict);
stopwatch.Stop();
Console.WriteLine("Search Long Dictionary: " + stopwatch.ElapsedMilliseconds);
stopwatch.Reset();
stopwatch.Start();
searchLongArray(arr);
stopwatch.Stop();
Console.WriteLine("Search Long Array: " + stopwatch.ElapsedMilliseconds);
}
/* Populate an array with long values. */
private static void populateLongArray(long[] arr)
{
for (long i = 0; i < capacity; i++)
{
arr[i] = i;
}
}
/* Populate a dictionary with long key/value pairs. */
private static void populateLongDictionary(Dictionary<long, long> dict)
{
for (long i = 0; i < capacity; i++)
{
dict.Add(i, i);
}
}
/* Search a Dictionary for each value in a range. */
private static void searchLongDictionary(Dictionary<long, long> dict)
{
for (long i = 0; i < capacity; i++)
{
long value = dict[i];
}
}
/* Do a binary search for each value in an array. */
private static void searchLongArray(long[] arr)
{
for (long i = 0; i < capacity; i++)
{
int index = Array.BinarySearch(arr, arr[i]);
}
}
/**
* Generate a random string of a given length.
* Implementation from https://stackoverflow.com/a/1344258/1288
*/
private static String generateRandomString(int length)
{
var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
var stringChars = new char[length];
var random = new Random();
for (int i = 0; i < stringChars.Length; i++)
{
stringChars[i] = chars[random.Next(chars.Length)];
}
return new String(stringChars);
}
}
以下是几种不同大小的集合的结果。(时间以毫秒为单位。)
500000长值...
填充长字典:26
填充长数组:2
搜索长字典:9
搜索长数组:80
500000字符串值...
填充字符串数组:1237
填充字符串字典:46
排序字符串数组:1755
搜索字符串字典:27
搜索字符串数组:1569
1000000长值...
填充长字典:58
填充长数组:5
搜索长字典:23
搜索长数组:136
1000000字符串值...
填充字符串数组:2070
填充字符串字典:121
排序字符串数组:3579
搜索字符串字典:58
搜索字符串数组:3267
3000000长值...
填充长字典:207
填充长数组:14
搜索长字典:75
搜索长数组:435
3000000字符串值...
填充字符串数组:5553
填充字符串字典:449
排序字符串数组:11695
搜索字符串字典:194
搜索字符串数组:10594
10000000长值...
填充长字典:521
填充长数组:47
搜索长字典:202
搜索长数组:1181
10000000字符串值...
填充字符串数组:18119
填充字符串字典:1088
排序字符串数组:28174
搜索字符串字典:747
搜索字符串数组:26503
为了进行比较,以下是该程序的最后一次运行的探查器输出(1000万个记录和查找)。我重点介绍了相关功能。他们与上述秒表计时指标非常吻合。
您可以看到字典查找比二进制搜索快得多,并且(如预期的那样)集合越大,差异就越明显。因此,如果您具有合理的散列功能(相当快且几乎没有冲突),则散列查找应胜过二进制搜索此范围内的集合。