Bootstrap

C#实现SQL Server数据血缘关系生成程序

要在现有的C#程序中添加功能,输出SQL Server数据血缘关系的三张表到Excel文件,我们需要进行以下几个步骤:

分析存储过程、视图和函数中的引用关系,构建数据血缘关系。
按依赖性从小到大排序表的顺序。
找出对应生成表的数据的存储过程。
将结果输出到Excel文件。

以下是完整的代码实现:

using Microsoft.SqlServer.TransactSql.ScriptDom;
using OfficeOpenXml;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

class Program
{
    static void Main()
    {
        string directoryPath = @"<搜索的目录路径>";
        var dataRelations = new Dictionary<string, List<string>>();
        var tableProcedures = new Dictionary<string, List<string>>();
        var allTablesAndViews = new HashSet<string>();

        ProcessSqlFiles(directoryPath, dataRelations, tableProcedures, allTablesAndViews);

        var sortedTables = SortTablesByDependency(dataRelations, allTablesAndViews);

        WriteToExcel(dataRelations, sortedTables, tableProcedures);
    }

    static void ProcessSqlFiles(string directoryPath, Dictionary<string, List<string>> dataRelations, Dictionary<string, List<string>> tableProcedures, HashSet<string> allTablesAndViews)
    {
        foreach (string filePath in Directory.EnumerateFiles(directoryPath, "*.sql", SearchOption.AllDirectories))
        {
            Console.WriteLine($"Processing file: {filePath}");
            string sqlContent = File.ReadAllText(filePath);
            ProcessSqlContent(sqlContent, dataRelations, tableProcedures, allTablesAndViews);
            CheckCreateStatements(sqlContent, tableProcedures);
        }
    }

    static void ProcessSqlContent(string sqlContent, Dictionary<string, List<string>> dataRelations, Dictionary<string, List<string>> tableProcedures, HashSet<string> allTablesAndViews)
    {
        TSql150Parser parser = new TSql150Parser(false);
        IList<ParseError> errors;
        TSqlFragment fragment = parser.Parse(new StringReader(sqlContent), out errors);

        if (errors.Count == 0)
        {
            var referenceVisitor = new ReferenceVisitor(dataRelations, allTablesAndViews);
            fragment.Accept(referenceVisitor);
        }
        else
        {
            foreach (var error in errors)
            {
                Console.WriteLine($"Parse error: {error.Message}");
            }
        }
    }

    static void CheckCreateStatements(string sqlContent, Dictionary<string, List<string>> tableProcedures)
    {
        // 匹配创建视图的语句
        MatchCollection viewMatches = Regex.Matches(sqlContent, @"CREATE\s+VIEW\s+([^\s(]+)", RegexOptions.IgnoreCase);
        foreach (Match match in viewMatches)
        {
            Console.WriteLine($"View: {match.Groups[1].Value}");
        }

        // 匹配创建存储过程的语句
        MatchCollection sprocMatches = Regex.Matches(sqlContent, @"CREATE\s+PROC(?:EDURE)?\s+([^\s(]+)", RegexOptions.IgnoreCase);
        foreach (Match match in sprocMatches)
        {
            Console.WriteLine($"Stored Procedure: {match.Groups[1].Value}");
            tableProcedures[match.Groups[1].Value] = new List<string>();
        }

        // 匹配创建函数的语句
        MatchCollection functionMatches = Regex.Matches(sqlContent, @"CREATE\s+(?:FUNCTION|AGGREGATE)\s+([^\s(]+)", RegexOptions.IgnoreCase);
        foreach (Match match in functionMatches)
        {
            Console.WriteLine($"User Defined Function: {match.Groups[1].Value}");
        }
    }

    static List<string> SortTablesByDependency(Dictionary<string, List<string>> dataRelations, HashSet<string> allTablesAndViews)
    {
        var sorted = new List<string>();
        var visited = new HashSet<string>();

        foreach (var table in allTablesAndViews)
        {
            TopologicalSort(table, dataRelations, sorted, visited);
        }

        return sorted;
    }

    static void TopologicalSort(string node, Dictionary<string, List<string>> dataRelations, List<string> sorted, HashSet<string> visited)
    {
        if (visited.Contains(node)) return;

        visited.Add(node);

        if (dataRelations.ContainsKey(node))
        {
            foreach (var child in dataRelations[node])
            {
                TopologicalSort(child, dataRelations, sorted, visited);
            }
        }

        sorted.Insert(0, node);
    }

    static void WriteToExcel(Dictionary<string, List<string>> dataRelations, List<string> sortedTables, Dictionary<string, List<string>> tableProcedures)
    {
        using (var package = new ExcelPackage())
        {
            var worksheet1 = package.Workbook.Worksheets.Add("Data Relations");
            worksheet1.Cells[1, 1].Value = "Table/View Name";
            worksheet1.Cells[1, 2].Value = "Generation Path";

            int row = 2;
            foreach (var table in sortedTables)
            {
                worksheet1.Cells[row, 1].Value = table;
                worksheet1.Cells[row, 2].Value = GetGenerationPath(table, dataRelations);
                row++;
            }

            var worksheet2 = package.Workbook.Worksheets.Add("Sorted Tables");
            worksheet2.Cells[1, 1].Value = "Table/View Name";
            for (int i = 0; i < sortedTables.Count; i++)
            {
                worksheet2.Cells[i + 2, 1].Value = sortedTables[i];
            }

            var worksheet3 = package.Workbook.Worksheets.Add("Table - Procedure Mapping");
            worksheet3.Cells[1, 1].Value = "Table/View Name";
            worksheet3.Cells[1, 2].Value = "Stored Procedures";

            row = 2;
            foreach (var table in sortedTables)
            {
                worksheet3.Cells[row, 1].Value = table;
                worksheet3.Cells[row, 2].Value = string.Join(", ", tableProcedures.GetValueOrDefault(table, new List<string>()));
                row++;
            }

            FileInfo file = new FileInfo("DataBloodline.xlsx");
            package.SaveAs(file);
        }
    }

    static string GetGenerationPath(string table, Dictionary<string, List<string>> dataRelations)
    {
        if (!dataRelations.ContainsKey(table)) return table;
        return table + "->" + string.Join("->", dataRelations[table].Select(t => GetGenerationPath(t, dataRelations)));
    }
}

class ReferenceVisitor : TSqlFragmentVisitor
{
    private readonly Dictionary<string, List<string>> dataRelations;
    private readonly HashSet<string> allTablesAndViews;

    public ReferenceVisitor(Dictionary<string, List<string>> dataRelations, HashSet<string> allTablesAndViews)
    {
        this.dataRelations = dataRelations;
        this.allTablesAndViews = allTablesAndViews;
    }

    public override void Visit(SelectQuerySpecification node)
    {
        VisitTableReferences(node.FromClause);
    }

    public override void Visit(InsertStatement node)
    {
        AddReference(node.TableReference);
    }

    public override void Visit(UpdateStatement node)
    {
        AddReference(node.TableReference);
    }

    public override void Visit(DeleteStatement node)
    {
        AddReference(node.TableReference);
    }

    public override void Visit(ViewDefinition node)
    {
        AddReference(node.SchemaObjectName);
    }

    private void VisitTableReferences(FromClause fromClause)
    {
        if (fromClause!= null)
        {
            foreach (var tableReference in fromClause.TableReferences)
            {
                AddReference(tableReference);
            }
        }
    }

    private void AddReference(TSqlFragment fragment)
    {
        if (fragment is TableReference tableReference)
        {
            var name = tableReference.SchemaObject.BaseIdentifier.Value;
            AddInfo(name);
        }
        else if (fragment is SchemaObjectName schemaObjectName)
        {
            var name = schemaObjectName.BaseIdentifier.Value;
            AddInfo(name);
        }
    }

    private void AddInfo(string name)
    {
        allTablesAndViews.Add(name);
        if (!dataRelations.ContainsKey(name))
        {
            dataRelations[name] = new List<string>();
        }
    }
}

代码说明:

Main方法:初始化变量并调用 ProcessSqlFiles 方法,最后调用 WriteToExcel 方法将结果输出到Excel文件。
ProcessSqlFiles方法:遍历指定目录及其子目录下的所有 .sql 文件,并对每个文件的内容执行 ProcessSqlContent 和 CheckCreateStatements 方法。
ProcessSqlContent方法:使用 Microsoft.SqlServer.TransactSql.ScriptDom 库解析SQL内容,获取引用关系。
CheckCreateStatements方法:使用正则表达式匹配SQL内容中的创建视图、存储过程和函数的语句,并更新 tableProcedures 字典。
SortTablesByDependency方法:使用拓扑排序按依赖性从小到大排序表的顺序。
WriteToExcel方法:使用EPPlus库将数据血缘关系的三张表输出到Excel文件。
ReferenceVisitor类:继承自 TSqlFragmentVisitor ,用于收集SQL语句中的表和视图引用关系。

请确保你已经安装了EPPlus库,可以通过NuGet安装:

Install-Package EPPlus
;