In one of my recent tasks I had to work with text files and extract rows based on certain values contained in a specific column.
I had a 4 GB file and I had to parse that file to extract rows that matched certain keywords.The total keyword matches varied from 10000 to less than a million.
For a smaller set of keywords I have used List but noticed that as the list items grew in size the lookup operation became slower.Below is a test I did to compare the lookup performance between a List<string> and HashSet<string>
HashSet<string> lookup is faster and should be used for lookup instead of a list.
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace HashSetListCompare
{
class Program
{
static void Main(string[] args)
{
try
{
string s = "512367";
List<string> sNumbersList = new List<string>();
HashSet<string> sNumbersHashSet = new HashSet<string>();
//List load performance
Stopwatch oListLoad = new Stopwatch();
oListLoad.Start();
for (int i = 0; i < 1000000; i++)
sNumbersList.Add(i.ToString());
oListLoad.Stop();
Console.WriteLine("List load time:" + oListLoad.Elapsed);
//Hash Set load performance
Stopwatch oHashSetLoad = new Stopwatch();
oHashSetLoad.Start();
for (int i = 0; i < 1000000; i++)
sNumbersHashSet.Add(i.ToString());
oHashSetLoad.Stop();
Console.WriteLine("Hash load time:" + oHashSetLoad.Elapsed);
//List lookup performance
Stopwatch oListWatch = new Stopwatch();
oListWatch.Start();
if(sNumbersList.Contains(s))
Console.Write("List lookup time: ");
oListWatch.Stop();
Console.WriteLine(oListWatch.Elapsed);
//Hash set lookup performance
Stopwatch oHashSetWatch = new Stopwatch();
oHashSetWatch.Start();
if(sNumbersHashSet.Contains(s))
Console.Write("Hash lookup time: ");
oHashSetWatch.Stop();
Console.WriteLine(oHashSetWatch.Elapsed);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.StackTrace);
}
Console.WriteLine("Done");
Console.Read();
}
}
}