Friday, August 28, 2015

c# - Hashset vs List performance

In one of my recent tasks I had to work with text files and extract rows based on certain values contained in a specific column.


I had a 4 GB file and I had to parse that file to extract rows that matched certain keywords.The total keyword matches varied from 10000 to less than a million.

For a smaller set of keywords I have used List but noticed that as the list items grew in size the lookup  operation became slower.Below is a test I did to compare the lookup performance between a List<string> and HashSet<string>

HashSet<string> lookup is faster and should be used for lookup instead of a list.

using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace HashSetListCompare
{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                string s = "512367";
                List<string> sNumbersList = new List<string>();
                HashSet<string> sNumbersHashSet = new HashSet<string>();

                //List load performance
                Stopwatch oListLoad = new Stopwatch();
                oListLoad.Start();
                for (int i = 0; i < 1000000; i++)
                    sNumbersList.Add(i.ToString());
                oListLoad.Stop();
                Console.WriteLine("List load time:" + oListLoad.Elapsed);

                //Hash Set load performance
                Stopwatch oHashSetLoad = new Stopwatch();
                oHashSetLoad.Start();
                for (int i = 0; i < 1000000; i++)
                    sNumbersHashSet.Add(i.ToString());
                oHashSetLoad.Stop();
                Console.WriteLine("Hash load time:" + oHashSetLoad.Elapsed);

                //List lookup performance
                Stopwatch oListWatch = new Stopwatch();
                oListWatch.Start();
                if(sNumbersList.Contains(s))
                    Console.Write("List lookup time: ");
                oListWatch.Stop();
                Console.WriteLine(oListWatch.Elapsed);

                //Hash set lookup performance
                Stopwatch oHashSetWatch = new Stopwatch();
                oHashSetWatch.Start();
                if(sNumbersHashSet.Contains(s))
                    Console.Write("Hash lookup time: ");
                oHashSetWatch.Stop();
                Console.WriteLine(oHashSetWatch.Elapsed);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
            Console.WriteLine("Done");
            Console.Read();
        }
    }
}


No comments: