Just Code‎ > ‎

Tsql - Procedure to compare data in two tables to find missing/additional/modified rows

posted Dec 8, 2011, 2:01 AM by Peter Henell   [ updated Jan 25, 2012, 5:17 AM ]
I was playing around with unit tests in SQL Server and decided i needed a way of comparing two datasets from within the database. The output should indicate what rows are Missing, Additional and Modified.

The way you would use this is to fill two temporary tables with data and send their objectIDs to this stored procedure. 
The first table should contain the data that is expected from your Unit test.
The second table should contain the actual data that you got (from your function, view or stored procedure or what it might be that you are testing)

The result will be a set of rows that are failing. Any completely matching rows will not be reported back.

This first version assumes that the first column in both tables is their primary key.
IF SCHEMA_ID('TestFramework') IS NULL
    EXEC ('create schema TestFramework;')
 
IF EXISTS (SELECT 1 FROM sys.objects WHERE type = 'p' AND name = 'CompareResults' AND schema_id = SCHEMA_ID('TestFramework'))
    DROP PROCEDURE TestFramework.CompareResults
GO
CREATE PROCEDURE TestFramework.CompareResults(@expected_ObjectId INT, @result_ObjectId INT)
AS
    SET NOCOUNT ON
    -- generic method to compare to sets that share the same schema
    DECLARE @sql VARCHAR(MAX)
    DECLARE @columnlist VARCHAR(MAX) = ''
    DECLARE @columns VARCHAR(MAX) = ''
    
    SELECT 
        @columnlist = @columnlist + CASE WHEN cols.column_id = 1 THEN 'PrimaryKeyColumn' ELSE cols.name END + ' ' + 
            CASE TYPE_NAME(cols.system_type_id)
                WHEN 'varchar' THEN TYPE_NAME(cols.system_type_id) + '(' + CAST(cols.max_length AS VARCHAR(100)) + ')'
                WHEN 'nvarchar' THEN TYPE_NAME(cols.system_type_id) + '(' + CAST(cols.max_length AS VARCHAR(100)) + ')'
                WHEN 'char' THEN TYPE_NAME(cols.system_type_id) + '(' + CAST(cols.max_length AS VARCHAR(100)) + ')'
                WHEN 'nchar' THEN TYPE_NAME(cols.system_type_id) + '(' + CAST(cols.max_length AS VARCHAR(100)) + ')'
                WHEN 'decimal' THEN TYPE_NAME(cols.system_type_id) + '(' + CAST(cols.precision AS VARCHAR(100)) + ', ' + CAST(cols.scale AS VARCHAR(100)) +')'
                ELSE TYPE_NAME(cols.system_type_id)
            END
            + ', '
    FROM    
        tempdb.sys.objects 
    CROSS APPLY
        (
            SELECT     
                c.name ,
                c.column_id,
                c.max_length ,
                c.precision ,
                c.system_type_id,
                c.scale ,
                c.is_nullable
            FROM 
                tempdb.sys.columns c 
            WHERE 
                object_id = @expected_ObjectId
        ) cols
    WHERE 
        object_id = @expected_ObjectId
    
    SELECT @columnlist = SUBSTRING(@columnlist, 0, LEN(@columnlist))
    
    -- Get the names of the temporary tables    
    DECLARE @resultTablename VARCHAR(MAX)
    SELECT @resultTableName = SCHEMA_NAME(schema_id) + '.' + name FROM tempdb.sys.objects WHERE OBJECT_ID = @result_ObjectId
    DECLARE @expecedTablename VARCHAR(MAX)
    SELECT @expecedTablename=  SCHEMA_NAME(schema_id) + '.' + name FROM tempdb.sys.objects WHERE OBJECT_ID = @expected_ObjectId
        
    -- Drop our working tables
    IF EXISTS(SELECT 1 FROM sys.objects WHERE name = 'resultTable' AND schema_id = SCHEMA_ID('TestFramework'))
        DROP TABLE TestFramework.resultTable
    IF EXISTS(SELECT 1 FROM sys.objects WHERE name = 'expectedTable' AND schema_id = SCHEMA_ID('TestFramework'))
        DROP TABLE TestFramework.expectedTable
    
    -- Create the working tables that will be used for comparisons
    SELECT @sql = 'create table TestFramework.resultTable( ' + @columnlist + ', TestResult varchar(100) NULL)'
    EXEC (@sql)
    SELECT @sql = 'create table TestFramework.expectedTable( ' + @columnlist + ', TestResult varchar(100) NULL)'
    EXEC (@sql)
    
    -- Move the data from the temporary tables to the working tables
    INSERT TestFramework.resultTable
    EXEC('select *, null from ' + @resultTableName)
    INSERT TestFramework.expectedTable
    EXEC('select *,null from ' + @expecedTablename)
    
    -- Find all differences between the expected and the result
    ;WITH Tchanges AS
    (
        SELECT * FROM TestFramework.expectedTable
        EXCEPT
        SELECT * FROM TestFramework.resultTable
    )
        
    MERGE TestFramework.resultTable  AS [TARGET]
    USING TChanges AS [SOURCE]

    ON [TARGET].PrimaryKeyColumn = [SOURCE].PrimaryKeyColumn

    -- If there is a match, then the PrimaryKeyColumn was found but some of the other columns had been modified.
    -- Mark the row as modified
    WHEN MATCHED THEN
        UPDATE SET
            TestResult = 'Columns not equal'
    -- If the row was not found then it means that it was found in expected but not in result
    -- Mark the row as added, 
    WHEN NOT MATCHED BY TARGET THEN
        INSERT (
            PrimaryKeyColumn ,
            TestResult
        )
        VALUES(
            SOURCE.PrimaryKeyColumn ,
            'Row Missing in result'
        )
    --output $action, inserted.PrimaryKeyColumn
    ;
    
    -- If the row was not found in expected, but in result
    -- mark the row as to many rows in result.
    update TestFramework.resultTable
    set TestResult = 'Row to many in result'
    where not exists( select 1 from TestFramework.expectedTable e where e.PrimaryKeyColumn = TestFramework.resultTable.PrimaryKeyColumn)
    
    delete TestFramework.resultTable
    where TestResult Is null
    
    
    
    SELECT * FROM TestFramework.resultTable
    
    DROP TABLE TestFramework.expectedTable
    DROP TABLE TestFramework.resultTable
    

go
Comments