This function is helpful if you are working with smaller DataTables less than a 1000 rows and would like to dynamically create lookup lists for the values within the tables rather than getting the lookup lists from an external source. The function will assume you are working with value types which can be support the Equals operator for comparison. All standard types supported by databases meet this condition.

Namespace:  CA.Common.Data
Assembly:  CA.Common (in CA.Common.dll) Version: 1.0.0.0 (1.0.0.0)

Remarks

The algorithm will work by sorting the data first then cycling through all the data comparing each element with the next element if a match is not found the value is added into result table if a match is found then the element already exists in the result and move on. This function will test every element in the row set once. This method has been superseded by DataView.ToTable() method for more details see using DataView.ToTable() to select distinct values

example using dataView

CopyC#
// doing the same thing using  NET 2.0's DataView
[Test]
public void SelectDistinct_BasicTestUsingDataView()
{

    // will create a datatable with 45 rows.. with 9 distinct values
    // ie 10 + 9 + 8 + 7 + 6 + ... + 2 + 1
    DataTable Sourcedt = CreateTestDataTable(9);
    // make sure there are 45 rows in the table.. unit test the unit test
    Assert.AreEqual(45, Sourcedt.Rows.Count);

    DataView distinctTable = new DataView(Sourcedt);
    distinctTable.ToTable(true, new[] { "TestCol" });
    // No go ask for the distinct values
    DataTable resultdt = distinctTable.ToTable(/*distinct*/true, /*ColNames As array*/ new[] { "TestCol" });

    // make sure there are 9 distinct values 
    Assert.AreEqual(9, resultdt.Rows.Count);
}

Examples

CopyC#
// Create a DataTable with a single Column which contains non unique test string data   
// if you pass 3 into the function you will get:<br>
// Test3
// Test3
// Test3
// Test2
// Test2
// Test1
// The order is inverse to ensure the data is not asending 
private DataTable CreateTestDataTable(int NumberOfDistinctValues)
{
    DataTable result = new DataTable();
    result.Columns.Add("TestCol", typeof(System.String));
    for (int i = NumberOfDistinctValues; i >= 1; i--)
    {
        for (int j = 0; j < i; j++)
        {
            result.Rows.Add(new object[] { string.Format("Test{0}", i.ToString())});
        }
    }
    result.AcceptChanges();
    return result;
}



// This is the basic primary test for the function..
// it tests that the distinct works as expected and that the results are returned in ascending order as expected
[Test]
public void SelectDistinct_BasicTest()
{
    // will create a datatable with 45 rows.. with 10 distinct values
    // ie 10 + 9 + 8 + 7 + 6 + ... + 2 + 1
    DataTable Sourcedt = CreateTestDataTable(9);
    // make sure there are 45 rows in the table.. unit test the unit test
    Assert.AreEqual(45, Sourcedt.Rows.Count);

    // No go ask for the distinct values
    DataTable Resultdt = DataHelper.SelectDistinct(Sourcedt, "TestCol");
    // make sure there are 9 distinct values 
    Assert.AreEqual(9, Resultdt.Rows.Count);
    // Now make sure those values have been returned in ascending order
    for(int i = 1; i <= 9; i++)
    {
        Assert.AreEqual(string.Format("Test{0}", i), Resultdt.Rows[i - 1][0].ToString());
    }
}

// This is the basic primary test for the function..
// it tests that the distinct works as expected and that the results are returned in ascending order as expected
[Test]
public void SelectDistinct_BasicTestWithNullValues()
{
    // will create a datatable with 45 rows.. with 10 distinct values
    // ie 10 + 9 + 8 + 7 + 6 + ... + 2 + 1
    DataTable Sourcedt = CreateTestDataTable(9);
    Sourcedt.Rows.Add(new object[] {DBNull.Value});
    Sourcedt.Rows.Add(new object[] {null});

    // make sure there are 45 rows in the table.. unit test the unit test
    Assert.AreEqual(47, Sourcedt.Rows.Count);

    // No go ask for the distinct values
    DataTable Resultdt = DataHelper.SelectDistinct(Sourcedt, "TestCol");
    // make sure there are 9 distinct values 
    Assert.AreEqual(10, Resultdt.Rows.Count);
    // Now make sure those values have been returned in ascending order
    for (int i = 2; i <= 10; i++)
    {
        Assert.AreEqual(string.Format("Test{0}", i-1), Resultdt.Rows[i - 1][0].ToString());
    }
}


// tests the fucntion with a large dataset.. ie over a million rows.. 
// 
[Test]
public void SelectDistinct_BasicTestWithLargeAmountOfData()
{
    // will create a datatable with over a million rows ie 1125750 rows.. with 1500 distinct values
    // ie 1500 + 1499 +  ... + 2 + 1
    DataTable Sourcedt = CreateTestDataTable(1500);
    // make sure there are 1125750 rows in the table.. unit test the unit test
    Assert.AreEqual(1125750, Sourcedt.Rows.Count);
    DateTime StartTime = DateTime.Now;
    DataTable Resultdt = DataHelper.SelectDistinct(Sourcedt, "TestCol");
    TimeSpan ts = StartTime.Subtract(DateTime.Now);
    Assert.LessOrEqual(ts.TotalSeconds, 5);
    Assert.AreEqual(1500, Resultdt.Rows.Count);
}

[Test]
public void SelectDistinct_BasicTestWithEmptyDataTable()
{
    DataTable result = new DataTable();
    result.Columns.Add("TestCol", typeof(System.String));
    result.AcceptChanges();
    DataTable Resultdt = DataHelper.SelectDistinct(result, "TestCol");
    Assert.AreEqual(0, Resultdt.Rows.Count);
}

[Test]
public void SelectDistinct_BasicTestWithBadColName()
{
    DataTable result = new DataTable();
    result.Columns.Add("TestCol", typeof(System.String));
    result.AcceptChanges();
    try
    {
        DataTable Resultdt = DataHelper.SelectDistinct(result, "TestCol2");
    }
    catch (DataException ex)
    {
        // expected exception.. 
        Assert.AreEqual("Column Name TestCol2 Does not exist in SourceTable", ex.Message);
    }
}

See Also