For one of my project I wanted to take all the files in a folder and merge 100 files together into 1 file resulting in a smaller set of files more or else of similar sizes.I had around 30 thousand files which are merged to 300 files.I used custom partitioner in TPL to do this.Below is the sample code that I started with.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace CustomPartitioner
{
class Program
{
static void Main(string[] args)
{
try
{
int iFileNumber = 0;
string sInputFolderPath =ConfigurationManager.AppSettings["INPUT_FOLDER_PATH"];
string[] sFileList = Directory.GetFiles(sInputFolderPath, "*.gz");
var rangePartitioner = Partitioner.Create(0, sFileList.Length, 100);//100 is the merge size
Parallel.ForEach(rangePartitioner, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (range, loopState) =>
{
try
{
using (StreamWriter oWriter = new StreamWriter(@"c:\Index_"+ Interlocked.Increment(ref iFileNumber)+".txt"))
{
for (int iIndex = range.Item1; iIndex < range.Item2; iIndex++)
{
try
{
oWriter.WriteLine(string.Format("{0}\t{1}", sFileList[iIndex], iIndex));
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.InnerException);
Console.WriteLine(ex.StackTrace);
}
});
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.InnerException);
Console.WriteLine(ex.StackTrace);
}
Console.WriteLine("Done !!!");
Console.Read();
}
}
}
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace CustomPartitioner
{
class Program
{
static void Main(string[] args)
{
try
{
int iFileNumber = 0;
string sInputFolderPath =ConfigurationManager.AppSettings["INPUT_FOLDER_PATH"];
string[] sFileList = Directory.GetFiles(sInputFolderPath, "*.gz");
var rangePartitioner = Partitioner.Create(0, sFileList.Length, 100);//100 is the merge size
Parallel.ForEach(rangePartitioner, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (range, loopState) =>
{
try
{
using (StreamWriter oWriter = new StreamWriter(@"c:\Index_"+ Interlocked.Increment(ref iFileNumber)+".txt"))
{
for (int iIndex = range.Item1; iIndex < range.Item2; iIndex++)
{
try
{
oWriter.WriteLine(string.Format("{0}\t{1}", sFileList[iIndex], iIndex));
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.InnerException);
Console.WriteLine(ex.StackTrace);
}
});
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.InnerException);
Console.WriteLine(ex.StackTrace);
}
Console.WriteLine("Done !!!");
Console.Read();
}
}
}