Below are two ways to extract data from Green Plum Warehouse tables.For tables that are small the code is straightforward.However for tables with millions of rows the steps are a little different.
I have listed the code for both here.
I have used the Npgsql.dll to create the connection to progresql Green Plum Warehouse.Add it as a reference in your C# project.Documentation can be found here http://npgsql.projects.pgfoundry.org/
1) For tables with small number of rows.
using Npgsql;
using System;
using System.Configuration;
namespace GPExtract
{
class Program
{
static void Main(string[] args)
{
try
{
ExtractData();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.StackTrace);
}
Console.WriteLine("Done !");
Console.Read();
}
private static void ExtractData()
{
using (NpgsqlConnection oConn = new NpgsqlConnection(ConfigurationManager.AppSettings["GP_CONNECTION_STRING"]))
{
NpgsqlCommand oCmd = new NpgsqlCommand(@"select distinct(id),min(create_ts),max(create_ts) from mytable group by id order by id", oConn);
oCmd.CommandTimeout = int.Parse(ConfigurationManager.AppSettings["COMMAND_TIMEOUT"]);
NpgsqlDataReader oReader = oCmd.ExecuteReader();
using (StreamWriter oWriter = new StreamWriter(ConfigurationManager.AppSettings["OUTPUT_FOLDER_PATH"] + "myoutputfile.txt"))
{
while (oReader.Read())
{
oWriter.WriteLine(string.Format("{0}\t{1}\t{2}", dr.GetValue(0) != null ? dr.GetValue(0).ToString() : "", dr.GetValue(1) != null ? GetFormattedDateTime(dr.GetValue(1).ToString()) : "", dr.GetValue(2) != null ? GetFormattedDateTime(dr.GetValue(2).ToString()) : ""));
}
}
}
}
}
}
2) In case of millions of rows the extraction of all rows is done by creating file on Green Plum accessible share, a temporary external table and loading the external table with the data from the Green Plum warehouse table.
using Npgsql;
using System;
using System.Configuration;
namespace GPExtract
{
class Program
{
static void Main(string[] args)
{
try
{
string sGpFilePath = ConfigurationManager.AppSettings["GP_FILE_PATH"];
string sOutputFilePath = ConfigurationManager.AppSettings["OUTPUT_FILE_PATH"];
ExtractData(sGpFilePath,sOutputFilePath);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine(ex.StackTrace);
}
Console.WriteLine("Done !");
Console.Read();
}
private static void ExtractData(string sGpFilePath,string sOutputFilePath)
{
if(!File.Exists(sGPFilePath)
{
//Create a file for GP to load from external table
File.Create(sGpFilePath);
string sSql = @"CREATE WRITABLE EXTERNAL TEMP TABLE temp_mytable ( id character varying(22),
min_create_dt integer,
max_create_dt integer)LOCATION (
'gpfdist://server-1:8082/mylogin/DATA/myfile.txt',
'gpfdist://server-1:8083/mylogin/DATA/myfile.txt',
'gpfdist://server-1:8084/mylogin/DATA/myfile.txt',
'gpfdist://server-1:8085/mylogin/DATA/myfile.txt')
FORMAT 'text' (delimiter E'\t' null '' )
ENCODING 'LATIN9'
DISTRIBUTED randomly;
insert into temp_mytable
select distinct(id),min(create_dt),max(create_dt) from mytable group by id order by id;";
using (NpgsqlConnection oConn = new NpgsqlConnection(ConfigurationManager.AppSettings["GP_CONNECTION_STRING"]))
{
oConn.Open();
NpgsqlCommand oCmd = new NpgsqlCommand(sSql, oConn);
oCmd.ExecuteNonQuery();
}
//Move the file from GP share to server
File.Move(sGpFilePath, sOutputFilePath);
}
}
}
Note: The second program is parallel extract across ports 8082-8085 and is faster.
No comments:
Post a Comment