Monday, December 15, 2014

SQL - Iterate,loop through a string

Recently I had to update a table based on a file which had some counts in it.After loading that file into a table we found that it had 25 Million plus rows in it.While we tried to run an update using the entire table we ended up getting timeouts and the table was getting locked.So we decided to do the update using smaller sets of rows.
The subset was decided on the first character of the Id column.In our case it fell into one of the following characters.

abcdefghijklmnopqrstuvwxyz0123456789

So I created a stored procedure that would take the first character of the Id column and load it into a temp table.Then I ran the update command using this subset of rows in the temp table.I repeated this for all the characters in the above string.

Below is the SQL to achieve this.

declare @id_string varchar(200)
declare @index int
declare @string_length int
declare @char char(1)

set @id_string = 'abcdefghijklmnopqrstuvwxyz0123456789'

set @index = 1
set @string_length= LEN(@id_string )

while @index<= @string_length
begin

set @char = SUBSTRING(@id_string , @index, 1)
--print @char
exec mystoredprocedure @char
set @index= @index+ 1
end

Friday, December 12, 2014

PIG - GroupBy, Count total records,total distinct records,min and max date

Syntactically Group By in PIG is a little different than SQL Group By.I wanted to get some counts from 500+ logs files so I decided to get the counts in PIG, Hadoop.I copied all the log files to HDFS.The files had two columns and were tab delimited.id and timestamp with format(YYYYMMDD).

Below is the script to get the counts using group by in PIG Script.Once the data is grouped by id,getting the totalcounts,min and max is fairly straightforward but for getting distinct counts I had to use a nested FOREACH. In the nested FOREACH, I distinct out the timestamps since I have multiple records in a given day and then use the COUNT operator on the distinct data.

rawdata = LOAD '/myfolder/my_logs/2014*.txt' USING PigStorage('\t') AS (id:chararray,postdate:chararray);

groupdata = GROUP rawdata BY id;
idcountdata = FOREACH groupdata 
                     {
distinctrawdata = DISTINCT rawdata;
GENERATE group as id,COUNT(rawdata) as totalcount,COUNT(distinctrawdata ) as distinctcount,MIN(rawdata .postdate) as mindate,MAX(rawdata .postdate) as maxdate;
     };

store idcountdata INTO '/myoutputfolder/idcounts/' USING PigStorage('\t');

Tuesday, December 9, 2014

iis - Post size exceeded the allowed limits

After hosting our appliction on IIS we started noticing a lot of warnings about the post size in the event viewer.


Once we tweaked the settings for the request length most of the warnings vanished.Below are the screenshots of the warnings,settings that was tweaked on IIS to handle post size from the client and the difference in the files received after the change in settings.










The default post size an application hosted on IIS can handle is 4 MB.After changing this setting to accept post size up to 20 MB in the web.config file for the application, we started receiving posts that were rejected before.


<system.web>
        <httpRuntime maxRequestLength="20480" />
</system.web>




















Monday, December 8, 2014

c# - Create child processes in parallel and wait till all child processes terminate

In one of my projects I had to invoke an external exe in parallel for carrying out simple command to execute a script in Hadoop. The parent process had to wait till all the child processes would complete execution on HDFS and then would carry on other steps.

Below is a simple example to achieve the same.In this example I have created an exe to write to a file i.e. SimpleWrite.exe. I call this exe in parallel (5 instances in parallel) and pass in an argument.The parent process that calls this exe waits till all instances of SimpleWrite completes and then continues its steps.


Program to call external exes in Parallel

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ProcessWait

{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {

                Console.WriteLine("Parent process started executing...");

                Console.WriteLine("Parent process is starting child processes...");
                Parallel.For(0, 5, iIndex =>
                {
                    Console.WriteLine("Child process {0} started",iIndex);
                    NewProcess(iIndex);
                });
                Console.WriteLine("All child processes completed.Waiting for user to press the enter key !!!");
                Console.Read();
                Console.WriteLine("Parent process to continue execution.");
                for (int iIndex = 0; iIndex < 10000; iIndex++)
                {
                    Console.WriteLine(iIndex.ToString());
                }
                Console.WriteLine("Parent process completed execution.");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            Console.Read();
        }

        private static void NewProcess(int iIndex)

        {
            Process oRunIndex = new Process();
            oRunIndex.StartInfo.FileName = "SimpleWrite.exe";
            oRunIndex.StartInfo.WorkingDirectory = @"c:\Data\";
            oRunIndex.StartInfo.CreateNoWindow = true;
            oRunIndex.StartInfo.Arguments = string.Format("{0}",iIndex);
            
            oRunIndex.Start();
            oRunIndex.WaitForExit();
        }
    }

}



SimpleWrite.exe code

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace SimpleWrite

{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                string sIndex = args[0];
                using(StreamWriter oWriter = new StreamWriter(@"c:\Data\" + sIndex +".txt"))
                {
                    for (int iIndex = 0; iIndex < 100000; iIndex++)
                    {
                        Console.WriteLine(iIndex.ToString());
                        oWriter.WriteLine(iIndex.ToString());
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.Read();
            }
        }
    }
}


Output

















Friday, November 21, 2014

c# - Using Custom Partition in Task Parallel Library TPL

For one of my project I wanted to take all the files in a folder and merge 100 files together into 1 file resulting in a smaller set of files more or else of similar sizes.I had around 30 thousand files which are merged to 300 files.I used custom partitioner in TPL to do this.Below is the sample code that I started with.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace CustomPartitioner

{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                int iFileNumber = 0;
                string sInputFolderPath =ConfigurationManager.AppSettings["INPUT_FOLDER_PATH"];
                string[] sFileList = Directory.GetFiles(sInputFolderPath, "*.gz");
               
                var rangePartitioner = Partitioner.Create(0, sFileList.Length, 100);//100 is the merge size
               
                Parallel.ForEach(rangePartitioner, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (range, loopState) =>
               {
                        try
                       {
                                using (StreamWriter oWriter = new StreamWriter(@"c:\Index_"+ Interlocked.Increment(ref iFileNumber)+".txt"))
                               {
                                    for (int iIndex = range.Item1; iIndex < range.Item2; iIndex++)
                                   {
                                        try
                                       {
                                               oWriter.WriteLine(string.Format("{0}\t{1}", sFileList[iIndex], iIndex));
                                        }
                                        catch (Exception ex)
                                       {
                                            Console.WriteLine(ex.Message);
                                        }
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                Console.WriteLine(ex.Message);
                                Console.WriteLine(ex.InnerException);
                                Console.WriteLine(ex.StackTrace);
                            }
                 });           
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.InnerException);
                Console.WriteLine(ex.StackTrace);
            }
            Console.WriteLine("Done !!!");
            Console.Read();
        }
    }
}

c# - Load large text files to database

Two ways to load a large text files to a table in the database are using
  • bcp command with xp_cmdShell enabled on the database server
  • bulk insert
The files I load are in the range of 10-15 GB tab delimited text files.These files are created by daily processes and are stored on file servers running Windows Server 2008.After the files are created they are loaded to SQL Server Database.

bcp command with xp_cmdShell


using System;
using System.Text;
using System.Configuration;
using System.IO;
using System.Data.SqlClient;

public class LoadLargeFileToDB


          static void Main()
          {
                 string sConnectionString = "server=MyServer;database=MyDatabase;uid='username';pwd='password'";
                 string sServer = "MyServer";
                 string sTableName = "MyTable";
                 string sImportFilePath = @"\\Servername\DriveName\SharedFolder\FileName.txt";


                 try
                 {
                      if (!File.Exists(sImportFilePath))
                              throw new Exception("Error importing BCP file = " + sImportFilePath + ". File does not exists.");
                                     
                      using (SqlConnection oSqlConnection = new SqlConnection(sConnectionString))
                      {
                             oSqlConnection.Open();
                             SqlCommand oSqlCommand = new SqlCommand("exec xp_cmdShell 'bcp " + sTableName + " in " + sImportFilePath + " -c -T -S " + sServer + "'", oSqlConnection);
                             oSqlCommand.CommandTimeout = 120000;
                             oSqlCommand.ExecuteNonQuery();
                      }
                }
                catch (Exception ex)
                {
                      Console.Error(ex.Message);
                      Console.Error(ex.InnerException);
                      Console.Error(ex.StackTrace);
                }
         }
}


Bulk insert

using System;
using System.Text;
using System.Configuration;
using System.IO;
using System.Data.SqlClient;

public class LoadLargeFileToDB


          static void Main()
          {
                 string sConnectionString = "server=MyServer;database=MyDatabase;uid='username';pwd='password'";
                 string sServer = "MyServer";
                 string sTableName = "MyTable";
                 string sImportFilePath = @"\\Servername\DriveName\SharedFolder\FileName.txt";


                 try
                 {
                      if (!File.Exists(sImportFilePath))
                              throw new Exception("Error importing BCP file = " + sImportFilePath + ". File does not exists.");
                                     
                      using (SqlConnection oSqlConnection = new SqlConnection(sConnectionString))
                      {
                             oSqlConnection.Open();
                             SqlCommand oSqlCommand = new SqlCommand("bulk insert "+ sTableName +" from '"+ sImportFilePath +"'";, oSqlConnection);
                             oSqlCommand.CommandTimeout = 120000;
                             oSqlCommand.ExecuteNonQuery();
                      }
                }
                catch (Exception ex)
                {
                      Console.Error(ex.Message);
                      Console.Error(ex.InnerException);
                      Console.Error(ex.StackTrace);
                }
         }
}

Thursday, November 6, 2014

c# - Send alerts when database table row value is changed

Requirement:
Send an alert when any column value in a given list of rows in a table changes.

There are different ways to build an alert system when a row's column value changes but in this case I am using C# to write a program that tracks these changes and send an email report with the changes.

The C# program is scheduled to run every minute to check whether the row value changed.

Few things to note.The table has primary key so all rows are unique.The table rows do not change often  and the size of the table is small with only a few thousand rows.Also we want to only track a subset of rows in the table.


Step 1: Create a file or table that holds the rows from the original table that will be tracked.The primary key list that will be tracked is stored in the configuration file.In this case I will be using a file to store these rows.


Step 2: Extract the rows based on the primary key list in configuration file from the original table and compare with the rows in the file or table created in the Step 1.


Step 3: If the column values are different add the primary key,column name,old value,new value to the report.


Step 4: Send an email with the report created in Step 3


Step 5: If the primary key count in the configuration file has changed or does not match with the rows in the file created in Step 1,then overwrite the file with the new list of rows that will be tracked.


Below is the entire code


using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Data.SqlClient;
using System.IO;
using System.Linq;
using System.Net.Mail;
using System.Text;
using NLog;

namespace MyCompany.MyDept.MyProject.TableRowEditAlert

{
    class Program
    {
        static void Main(string[] args)
        {
            Logger oLog = LogManager.GetCurrentClassLogger();
            try
            {
                string sConnectionString = ConfigurationManager.AppSettings["MY_DB"];
                string sIdList = ConfigurationManager.AppSettings["ID_LIST"];
                string sFilePath = ConfigurationManager.AppSettings["FILE_PATH"];

                DataSet ds = GetIdDetails(sConnectionString, sIdList);

                if (ds != null)
                {
                    if (!File.Exists(sFilePath))
                    {
                        CreateIdDetailsFile(sFilePath, ds,true);
                    }
                    else
                    {
                        string [] sFileLines = File.ReadAllLines(sFilePath);
                        if ((sFileLines.Length - 1) != ds.Tables[0].Rows.Count)
                        {
                            CreateIdDetailsFile(sFilePath, ds, false);
                        }
                        else
                        {
                            StringBuilder sDiff = new StringBuilder();
                            string [] sCols = sFileLines[0].Split('\t');
                           
                            for (int iIndex = 1; iIndex < sFileLines.Length; iIndex++)
                            {
                                string[] sFileLineItems = sFileLines[iIndex].Split('\t');
                                for (int jIndex = 0; jIndex < sFileLineItems.Length; jIndex++)
                                {
                                    if (ds.Tables[0].Rows[iIndex -1][jIndex] != null && ds.Tables[0].Rows[iIndex-1][jIndex].ToString() != sFileLineItems[jIndex])
                                    {
                                        sDiff.Append(sFileLineItems[0]);
                                        sDiff.Append('\t');
                                        sDiff.Append(sFileLineItems[6]);
                                        sDiff.Append('\t');
                                        sDiff.Append(sFileLineItems[1]);
                                        sDiff.Append('\t');
                                        sDiff.Append(sCols[jIndex]);
                                        sDiff.Append('\t');
                                        sDiff.Append(sFileLineItems[jIndex]);
                                        sDiff.Append('\t');
                                        sDiff.Append(ds.Tables[0].Rows[iIndex-1][jIndex].ToString());
                                        sDiff.Append('\n');
                                    }
                                }
                            }
                             if (!string.IsNullOrEmpty(sDiff.ToString()))
                            {
                                SendMail(GenerateReport(sDiff.ToString()).ToString(), ConfigurationManager.AppSettings["EMAIL_SUBJECT"], true, ConfigurationManager.AppSettings["NOTIFICATION_LIST"].Split(','));

                                CreateIdDetailsFile(sFilePath, ds, false);

                            }
                        }
                    }
                }
                
            }
            catch (Exception ex)
            {
                oLog.Error(ex.Message);
                oLog.Error(ex.StackTrace);
            }
        }

        private static void CreateIdDetailsFile(string sFilePath, DataSet ds,bool bAppend)

        {
            using (StreamWriter oWriter = new StreamWriter(sFilePath,bAppend))
            {
                StringBuilder sb = new StringBuilder();
                foreach (DataColumn col in ds.Tables[0].Columns)
                {
                    sb.Append(col.ColumnName);
                    if (col.Ordinal != ds.Tables[0].Columns.Count-1)
                        sb.Append('\t');
                }
                sb.Append(Environment.NewLine);

                foreach (DataRow row in ds.Tables[0].Rows)

                {
                    foreach (DataColumn col in ds.Tables[0].Columns)
                    {
                        sb.Append(row[col] != null ? row[col].ToString() : "");
                        if(col.Ordinal != ds.Tables[0].Columns.Count - 1)
                            sb.Append('\t');
                    }
                    sb.Append(Environment.NewLine);
                }
                oWriter.Write(sb.ToString());
            }
        }

        private static DataSet GetIdDetails(string sConnectionString, string sIdList)

        {
            using (SqlConnection oConn = new SqlConnection(sConnectionString))
            {
                oConn.Open();
                SqlCommand oCmd = new SqlCommand("select * from dbo.my_table where id in (" + sIdList + ") order by id", oConn);
                oCmd.CommandType = CommandType.Text;
                SqlDataAdapter oDataAdapter = new SqlDataAdapter(oCmd);
                DataSet ds = new DataSet();
                oDataAdapter.Fill(ds);
                return ds;
            }
        }

        public static void SendMail(String message, String subject, Boolean p_bIsHtml, String[] notificationList)

        {
            try
            {
                MailMessage MyMessage = new MailMessage();
                MyMessage.From = new MailAddress(ConfigurationManager.AppSettings["EMAIL_FROM"]);
                foreach (String email in notificationList)
                    MyMessage.To.Add(new MailAddress(email));
                MyMessage.IsBodyHtml = p_bIsHtml;
                MyMessage.Subject = subject;
                MyMessage.Body = message;

                SmtpClient emailClient = new SmtpClient("smtp.office.mycompany.com");

                emailClient.Send(MyMessage);
            }
            catch
            {
            }
        }

        private static StringBuilder GenerateReport(string sDiff)

        {
            string [] sLines = sDiff.Split('\n');
            StringBuilder sbHtml = new StringBuilder();
            sbHtml.AppendLine(@"<style type='text/css'>td{font-size: 9px;font-family: verdana} </style>");
            sbHtml.AppendLine("<table><border=1><tr bgcolor=Orange align=center><td colspan=6><b>Table Row Edit Alert</b></td></tr><tr bgcolor=LightBlue align=center><td><b>ID</b></td><td><b> Name</b></td><td><b> URL</b></td><td><b>Column</b></td><td><b>Old Value</b></td><td><b>New Value</b></td></tr>");
            foreach (string  s in sLines)
            {
                if(!string.IsNullOrEmpty(s))
                {
                    string[] sLineItems = s.Split('\t');
                    sbHtml.AppendLine("<tr bgcolor=LightGreen align=center><td><b>" + sLineItems[0] + "</b></td><td><b>" + sLineItems[1] + "</b></td><td><b>" + sLineItems[2] + "</b></td><td><b>" + sLineItems[3] + "</b></td><td><b>" + sLineItems[4] + "</b></td><td><b>" + sLineItems[5] + "</b></td></tr>");
                }
            }
            sbHtml.AppendLine("</table");
            sbHtml.AppendLine("<br/>");
            sbHtml.AppendLine("<br/>");
            return sbHtml;
        }
    }
}

Configuration File

<?xml version="1.0" encoding="utf-8" ?>
<configuration>
  <appSettings>
    <add key="MY_DB" value="server=server_name;database=database_name;uid='my_id';pwd='my_password';"/>
    <add key="ID_LIST" value="100,101,102,103,500,501,501,701,703,704" />
    <add key="FILE_PATH" value="C:\Data\Id.txt"/>
    <add key="EMAIL_FROM" value="admin@mycompany.com"/>
    <add key="EMAIL_SUBJECT" value="Table Row Edit Alert"/>
    <add key="NOTIFICATION_LIST" value="my_email@mycompany.com"/>
  </appSettings>
</configuration>

Friday, October 31, 2014

c# - Read data from IPhone Backup file

There are two different types of file formats used by IPhone iOS to store data.Binary and SqLite.In this post I am reading data from a SQLite file.

Download the dll (http://system.data.sqlite.org/index.html/doc/trunk/www/downloads.wikiand add it as a reference in your project.






















Add the following using statement after adding the reference

using System.Data.SQLite;

try

{
         using (SQLiteConnection oSqlLiteConnection = new SQLiteConnection("Data Source=" + @"D:\MyBackups\MyIphoneBackupFile.sqlite"))

        {
               oSqlLiteConnection.Open();
               SQLiteCommand cmd = new SQLiteCommand("Select * from Scalars", oSqlLiteConnection);
               SQLiteDataReader dr = cmd.ExecuteReader();

               while (dr.Read())

                     Console.WriteLine(String.Format("{0}\t{1}\t{2}\t{3}", dr.GetValue(0), dr.GetValue(1), dr.GetValue(2)));
         }
}
catch(Exception ex)
{
    Console.WriteLine(ex.Message);
    Console.WriteLine(ex.Stacktrace);
}

Thursday, October 23, 2014

c# - Editing/Updating confluence wiki page using c#, Atlassian Developer WebService

Program to update the company's intranet wiki page using Atlassian Developer WebService.

To be able to do this using a c# program you would need the web reference in the project that points to the web service.Add the web service url as the web reference in your project.


Right click on your project and select Add Service Reference








































Once the service is added to the project the next step is to use the service methods to update the pages on your intranet.

Below is the entire code.You would need login,password and page id.


using System;
using System.Configuration;
using MyCompany.MyDepartment.ConfluenceWikiUpdate.MyCompany.Atlassian.Developer;

namespace MyCompany.MyDepartment.ConfluenceWikiUpdate

{
    class Program
    {
        static void Main(string[] args)
        {
            try
            {
                ConfluenceSoapServiceService oWikiUpdateService = new ConfluenceSoapServiceService();
                String sLoginToken = String.Empty;

                try

                {
                    sLoginToken = oWikiUpdateService.login(ConfigurationManager.AppSettings["CONFLUENCE_LOGIN"], ConfigurationManager.AppSettings["CONFLUENCE_PASSWD"]);
                }
                catch
                {
                    Console.WriteLine("Login attempt failed");
                }

                RemotePage oPage = oWikiUpdateService.getPage(sLoginToken, Convert.ToInt64(ConfigurationManager.AppSettings["CONFLUENCE_PAGEID"]));

                oPage.content = ConfigurationManager.AppSettings["TEXT"];
                oWikiUpdateService.updatePage(sLoginToken, oPage, new RemotePageUpdateOptions());

                oWikiUpdateService.logout(sLoginToken);


            }

            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                Console.WriteLine(ex.StackTrace);
            }
        }
    }
}

The Configuration file is below

<?xml version="1.0" encoding="utf-8" ?>

<configuration>
  <appSettings>
    <add key="CONFLUENCE_LOGIN" value="MyDepartment@MyCompany.com"/>
    <add key="CONFLUENCE_PASSWD" value="Something"/>
    <add key="CONFLUENCE_PAGEID" value="12345678"/>
    <add key="TEXT" value="This is the table generated from a daily process."/>
  </appSettings>
    <system.serviceModel>
        <bindings />
        <client />
    </system.serviceModel>
    <applicationSettings>
        <MyCompany.MyDepartment.ConfluenceWikiUpdate.Properties.Settings>
            <setting name="MyCompany_MyDepartment_ConfluenceWikiUpdate_com_atlassian_developer_ConfluenceSoapServiceService"
                serializeAs="String">
                <value>https://wiki.office.mycompany.com/plugins/servlet/soap-axis1/confluenceservice-v2</value>
            </setting>
        </MyCompany.MyDepartment.ConfluenceWikiUpdate.Properties.Settings>
    </applicationSettings>
</configuration>