There are numerous times when it would be handy to have a DISTINCT function to select distinct values out of a datatable used earlier in code. This saves additional round trips to the SQL server. I have used this code in the passed :
private static bool ColumnEqual(object a, object b)
{
if (a == DBNull.Value && b == DBNull.Value) // both are DBNull.Value
return true;
if (a == DBNull.Value || b == DBNull.Value) // only one is DBNull.Value
return false;
return (a.Equals(b)); // value type standard comparison
}
public static DataTable SelectDistinct(DataTable sourceTable, string columnName)
{
// Create a new DataTable
DataTable resultdt = new DataTable();
// Get the Colunm to use
DataColumn dc = sourceTable.Columns[columnName];
if (dc != null)
{
// Add a col to the result table
resultdt.Columns.Add(columnName, dc.DataType);
// set an object to hold the previous value, and set it value to null fo the first element
object lastValueAdded = null;
// get the a sorted list of the results
DataRow[] sourceDataRows = sourceTable.Select(string.Empty, columnName);
foreach (DataRow dr in sourceDataRows)
{
// if it is the first element or a diffrent value add to the result table
if (lastValueAdded == null || !(ColumnEqual(lastValueAdded, dr[dc])))
{
// set the value for the LastValueAdded
lastValueAdded = dr[dc];
resultdt.Rows.Add(new [] { lastValueAdded });
}
}
}
else
{
throw new DataException(string.Format("Column Name {0} Does not exist in SourceTable",
columnName));
}
return resultdt;
}
This code is a well know derivative from one at Microsoft. http://support.microsoft.com/default.aspx?scid=kb;en-us;326176#1. The usage is simple as is outlined below:
DataTable Resultdt = DataHelper.SelectDistinct(Sourcedt, "TestCol");
More Examples can be found at
Examples using DataHelper.SelectDistinct
The problem with this code was that it did not support multiple distinct fields in a single table. This is a slight drawback for many distinct usages such as drop down lists which typically require value and id.
Rather than rewriting the function above to support multiple fields, I took a closer look at the framework and the distinct functionality is actually wrapped up in the DataView.ToTable Method which was released with .NET 2.0.
Summary usage:
DataView distinctTableView = new DataView(sourceDataTable);
DataTable distinctResult;
// Select Distinct on Col1 and Col2
distinctResult = distinctTableView.ToTable(true, new[] { "Col1","Col2" });
More comprehensive Examples using a Unit Test
[Test]
public void SelectDistinctValuesExample()
{
DataTable dt = new DataTable();
dt.Columns.Add("Col1", typeof (int));
dt.Columns.Add("Col2", typeof(int));
dt.Columns.Add("Col3", typeof(int));
dt.AcceptChanges();
dt.LoadDataRow(new object[] {1,1,1}, true);
dt.LoadDataRow(new object[] {1,1,2}, true);
dt.LoadDataRow(new object[] {1,1,3}, true);
dt.LoadDataRow(new object[] {1,2,1}, true);
dt.LoadDataRow(new object[] {1,2,2}, true);
dt.LoadDataRow(new object[] {1,2,3}, true);
dt.LoadDataRow(new object[] {1,3,1}, true);
dt.LoadDataRow(new object[] {1,3,2}, true);
dt.LoadDataRow(new object[] {1,3,3}, true);
dt.LoadDataRow(new object[] {2,1,1}, true);
dt.LoadDataRow(new object[] {2,1,2}, true);
dt.LoadDataRow(new object[] {2,1,3}, true);
dt.LoadDataRow(new object[] {2,2,1}, true);
dt.LoadDataRow(new object[] {2,2,2}, true);
dt.LoadDataRow(new object[] {2,2,3}, true);
dt.LoadDataRow(new object[] {2,3,1}, true);
dt.LoadDataRow(new object[] {2,3,2}, true);
dt.LoadDataRow(new object[] {2,3,3}, true);
DataView distinctTableView = new DataView(dt);
DataTable distinctResult;
// Example 1 Select Distinct on Col1
distinctResult = distinctTableView.ToTable(true, new[] { "Col1" });
// assert result is
// 1
// 2
Assert.AreEqual(2, distinctResult.Rows.Count);
Assert.AreEqual(1, distinctResult.Rows[0][0]);
Assert.AreEqual(2, distinctResult.Rows[1][0]);
// Example 2 Select Distinct on Col2
distinctResult = distinctTableView.ToTable(true, new[] { "Col2" });
// assert result is
// 1
// 2
// 3
Assert.AreEqual(3, distinctResult.Rows.Count);
Assert.AreEqual(1, distinctResult.Rows[0][0]);
Assert.AreEqual(2, distinctResult.Rows[1][0]);
Assert.AreEqual(3, distinctResult.Rows[2][0]);
distinctResult = distinctTableView.ToTable(true, new[] { "Col1" , "Col2" });
Assert.AreEqual(6, distinctResult.Rows.Count);
// assert result is
// 1,1
// 1,2
// 1,3
// 2,1
// 2,2
// 2.3
Assert.AreEqual(1, distinctResult.Rows[0][0]);
Assert.AreEqual(1, distinctResult.Rows[1][0]);
Assert.AreEqual(1, distinctResult.Rows[2][0]);
Assert.AreEqual(1, distinctResult.Rows[0][1]);
Assert.AreEqual(2, distinctResult.Rows[1][1]);
Assert.AreEqual(3, distinctResult.Rows[2][1]); // only do 6 cells assume rest are correct
}