优化CTE层次结构


15

在下面更新

我有一个具有典型acct /父帐户体系结构的帐户表来表示帐户层次结构(SQL Server 2012)。我使用CTE创建了一个VIEW,以散列层次结构,总的来说,它可以按预期的方式很好地工作。我可以在任何级别查询层次结构,并轻松查看分支。

需要根据层次结构返回一个业务逻辑字段。每个帐户记录中的一个字段描述了企业的规模(我们将其称为CustomerCount)。我需要报告的逻辑需要从整个分支汇总CustomerCount。换句话说,给定一个帐户,我需要对该帐户以及层次结构中该帐户下方每个分支中的每个子级的客户计数值进行汇总。

我使用CTE中内置的层次结构字段成功计算了该字段,该字段类似于acct4.acct3.acct2.acct1。我遇到的问题只是使其运行速度很快。如果没有这一计算字段,查询将在约3秒内运行。当我在计算字段中添加时,它变成一个4分钟的查询。

这是我能够想到的最佳版本,它可以返回正确的结果。我正在寻找有关如何在不牺牲性能的前提下重新构建此“视图”的想法。

我知道这种方法运行缓慢的原因(需要在where子句中计算谓词),但是我想不出另一种方法来构造它并仍然得到相同的结果。

这是一些示例代码,用于构建表并完全按照其在我的环境中的工作方式执行CTE。

Use Tempdb
go
CREATE TABLE dbo.Account
(
   Acctid varchar(1) NOT NULL
    , Name varchar(30) NULL
    , ParentId varchar(1) NULL
    , CustomerCount int NULL
);

INSERT Account
SELECT 'A','Best Bet',NULL,21  UNION ALL
SELECT 'B','eStore','A',30 UNION ALL
SELECT 'C','Big Bens','B',75 UNION ALL
SELECT 'D','Mr. Jimbo','B',50 UNION ALL
SELECT 'E','Dr. John','C',100 UNION ALL
SELECT 'F','Brick','A',222 UNION ALL
SELECT 'G','Mortar','C',153 ;


With AccountHierarchy AS

(                                                                           --Root values have no parent
    SELECT
        Root.AcctId                                         AccountId
        , Root.Name                                         AccountName
        , Root.ParentId                                     ParentId
        , 1                                                 HierarchyLevel  
        , cast(Root.Acctid as varchar(4000))                IdHierarchy     --highest parent reads right to left as in id3.Acctid2.Acctid1
        , cast(replace(Root.Name,'.','') as varchar(4000))  NameHierarchy   --highest parent reads right to left as in name3.name2.name1 (replace '.' so name parse is easy in last step)
        , cast(Root.Acctid as varchar(4000))                HierarchySort   --reverse of above, read left to right name1.name2.name3 for sorting on reporting only
        , cast(Root.Name as varchar(4000))                  HierarchyLabel  --use for labels on reporting only, indents names under sorted hierarchy
        , Root.CustomerCount                                CustomerCount   

    FROM 
        tempdb.dbo.account Root

    WHERE
        Root.ParentID is null

    UNION ALL

    SELECT
        Recurse.Acctid                                      AccountId
        , Recurse.Name                                      AccountName
        , Recurse.ParentId                                  ParentId
        , Root.HierarchyLevel + 1                           HierarchyLevel  --next level in hierarchy
        , cast(cast(recurse.Acctid as varchar(40)) + '.' + Root.IdHierarchy as varchar(4000))   IdHierarchy --cast because in real system this is a uniqueidentifier type needs converting
        , cast(replace(recurse.Name,'.','') + '.' + Root.NameHierarchy as varchar(4000)) NameHierarchy  --replace '.' for parsing in last step, cast to make room for lots of sub levels down the hierarchy
        , cast(Root.AccountName + '.' + Recurse.Name as varchar(4000)) HierarchySort    
        , cast(space(root.HierarchyLevel * 4) + Recurse.Name as varchar(4000)) HierarchyLabel
        , Recurse.CustomerCount                             CustomerCount

    FROM
        tempdb.dbo.account Recurse INNER JOIN
        AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)


SELECT
    hier.AccountId
    , Hier.AccountName
    , hier.ParentId
    , hier.HierarchyLevel
    , hier.IdHierarchy
    , hier.NameHierarchy
    , hier.HierarchyLabel
    , parsename(hier.IdHierarchy,1) Acct1Id
    , parsename(hier.NameHierarchy,1) Acct1Name     --This is why we stripped out '.' during recursion
    , parsename(hier.IdHierarchy,2) Acct2Id
    , parsename(hier.NameHierarchy,2) Acct2Name
    , parsename(hier.IdHierarchy,3) Acct3Id
    , parsename(hier.NameHierarchy,3) Acct3Name
    , parsename(hier.IdHierarchy,4) Acct4Id
    , parsename(hier.NameHierarchy,4) Acct4Name
    , hier.CustomerCount

    /* fantastic up to this point. Next block of code is what causes problem. 
        Logic of code is "sum of CustomerCount for this location and all branches below in this branch of hierarchy"
        In live environment, goes from taking 3 seconds to 4 minutes by adding this one calc */

    , (
        SELECT  
            sum(children.CustomerCount)
        FROM
            AccountHierarchy Children
        WHERE
            hier.IdHierarchy = right(children.IdHierarchy, (1 /*length of id field*/ * hier.HierarchyLevel) + hier.HierarchyLevel - 1 /*for periods inbetween ids*/)
            --"where this location's idhierarchy is within child idhierarchy"
            --previously tried a charindex(hier.IdHierarchy,children.IdHierarchy)>0, but that performed even worse
        ) TotalCustomerCount
FROM
    AccountHierarchy hier

ORDER BY
    hier.HierarchySort


drop table tempdb.dbo.Account

2013年11月20日更新

某些建议的解决方案使我的工作顺其自然,我尝试了一种接近的新方法,但引入了新的/不同的障碍。老实说,我不知道是否需要单独发表,但这与解决此问题有关。

我决定让sum(customercount)变得困难的原因是在从顶部开始并向下构建的层次结构的上下文中识别子级。因此,我首先创建了一个由下而上构建的层次结构,使用“不是其他任何帐户的父级帐户”定义的根目录,并进行递归联接(root.parentacctid = recurse.acctid)

这样,我可以在递归发生时将子级客户计数添加到父级。由于我需要报告和级别,因此,除了自上而下,我还要执行自下而上的CTE,然后通过帐户ID将它们加入。事实证明,这种方法比最初的外部查询客户数量要快得多,但遇到了一些障碍。

首先,我无意间捕获了多个孩子的父级帐户的重复客户计数。根据孩子的数量,我算是某种计费方式的客户计数的两倍或三倍。我的解决方案是创建另一个计算acct的节点的cte,并在递归过程中对acct.customercount进行划分,因此当我累加整个分支时,不会重复计算acct。

因此,在这一点上,此新版本的结果不正确,但我知道为什么。自下而上的cte正在创建重复项。递归通过后,它将在根(子级子级)中查找帐户表中子级的子项。在第三次递归中,它选择与第二次递归相同的帐户,然后再次放入。

关于如何进行自下而上的CTE的想法,还是这会引起其他想法的流行?

Use Tempdb
go


CREATE TABLE dbo.Account
(
    Acctid varchar(1) NOT NULL
    , Name varchar(30) NULL
    , ParentId varchar(1) NULL
    , CustomerCount int NULL
);

INSERT Account
SELECT 'A','Best Bet',NULL,1  UNION ALL
SELECT 'B','eStore','A',2 UNION ALL
SELECT 'C','Big Bens','B',3 UNION ALL
SELECT 'D','Mr. Jimbo','B',4 UNION ALL
SELECT 'E','Dr. John','C',5 UNION ALL
SELECT 'F','Brick','A',6 UNION ALL
SELECT 'G','Mortar','C',7 ;



With AccountHierarchy AS

(                                                                           --Root values have no parent
    SELECT
        Root.AcctId                                         AccountId
        , Root.Name                                         AccountName
        , Root.ParentId                                     ParentId
        , 1                                                 HierarchyLevel  
        , cast(Root.Acctid as varchar(4000))                IdHierarchy     --highest parent reads right to left as in id3.Acctid2.Acctid1
        , cast(replace(Root.Name,'.','') as varchar(4000))  NameHierarchy   --highest parent reads right to left as in name3.name2.name1 (replace '.' so name parse is easy in last step)
        , cast(Root.Acctid as varchar(4000))                HierarchySort   --reverse of above, read left to right name1.name2.name3 for sorting on reporting only
        , cast(Root.Acctid as varchar(4000))                HierarchyMatch 
        , cast(Root.Name as varchar(4000))                  HierarchyLabel  --use for labels on reporting only, indents names under sorted hierarchy
        , Root.CustomerCount                                CustomerCount   

    FROM 
        tempdb.dbo.account Root

    WHERE
        Root.ParentID is null

    UNION ALL

    SELECT
        Recurse.Acctid                                      AccountId
        , Recurse.Name                                      AccountName
        , Recurse.ParentId                                  ParentId
        , Root.HierarchyLevel + 1                           HierarchyLevel  --next level in hierarchy
        , cast(cast(recurse.Acctid as varchar(40)) + '.' + Root.IdHierarchy as varchar(4000))   IdHierarchy --cast because in real system this is a uniqueidentifier type needs converting
        , cast(replace(recurse.Name,'.','') + '.' + Root.NameHierarchy as varchar(4000)) NameHierarchy  --replace '.' for parsing in last step, cast to make room for lots of sub levels down the hierarchy
        , cast(Root.AccountName + '.' + Recurse.Name as varchar(4000)) HierarchySort    
        , CAST(CAST(Root.HierarchyMatch as varchar(40)) + '.' 
            + cast(recurse.Acctid as varchar(40))   as varchar(4000))   HierarchyMatch
        , cast(space(root.HierarchyLevel * 4) + Recurse.Name as varchar(4000)) HierarchyLabel
        , Recurse.CustomerCount                             CustomerCount

    FROM
        tempdb.dbo.account Recurse INNER JOIN
        AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)

, Nodes as
(   --counts how many branches are below for any account that is parent to another
    select
        node.ParentId Acctid
        , cast(count(1) as float) Nodes
    from AccountHierarchy  node
    group by ParentId
)

, BottomUp as
(   --creates the hierarchy starting at accounts that are not parent to any other
    select
        Root.Acctid
        , root.ParentId
        , cast(isnull(root.customercount,0) as float) CustomerCount
    from
        tempdb.dbo.Account Root
    where
        not exists ( select 1 from tempdb.dbo.Account OtherAccts where root.Acctid = OtherAccts.ParentId)

    union all

    select
        Recurse.Acctid
        , Recurse.ParentId
        , root.CustomerCount + cast ((isnull(recurse.customercount,0) / nodes.nodes) as float) CustomerCount
        -- divide the recurse customercount by number of nodes to prevent duplicate customer count on accts that are parent to multiple children, see customercount cte next
    from
        tempdb.dbo.Account Recurse inner join 
        BottomUp Root on root.ParentId = recurse.acctid inner join
        Nodes on nodes.Acctid = recurse.Acctid
)

, CustomerCount as
(
    select
        sum(CustomerCount) TotalCustomerCount
        , hier.acctid
    from
        BottomUp hier
    group by 
        hier.Acctid
)


SELECT
    hier.AccountId
    , Hier.AccountName
    , hier.ParentId
    , hier.HierarchyLevel
    , hier.IdHierarchy
    , hier.NameHierarchy
    , hier.HierarchyLabel
    , hier.hierarchymatch
    , parsename(hier.IdHierarchy,1) Acct1Id
    , parsename(hier.NameHierarchy,1) Acct1Name     --This is why we stripped out '.' during recursion
    , parsename(hier.IdHierarchy,2) Acct2Id
    , parsename(hier.NameHierarchy,2) Acct2Name
    , parsename(hier.IdHierarchy,3) Acct3Id
    , parsename(hier.NameHierarchy,3) Acct3Name
    , parsename(hier.IdHierarchy,4) Acct4Id
    , parsename(hier.NameHierarchy,4) Acct4Name
    , hier.CustomerCount

    , customercount.TotalCustomerCount

FROM
    AccountHierarchy hier inner join
    CustomerCount on customercount.acctid = hier.accountid

ORDER BY
    hier.HierarchySort 



drop table tempdb.dbo.Account

1
您是否尝试过将AccountHierarchy CTE的结果放入临时表(在IdHierarchy上建立索引),然后通过从临时表查询进行计算?您可能会遇到CTE的实施方式;您可能会对CTE中的每一行执行一次整个CTE。
Jon Boulineau

1
基础表上的索引是什么?
Mike Walsh

1
实际表中有多少行?
Mike Walsh 2013年

2
@MaxVernon谢谢。还没有发布太多,但是绝对可以看到模糊问题的答案质量有所不同。
liver.larson

@JonBoulineau我曾考虑过尝试使用临时表进行某些操作,但是我专门尝试将其作为视图执行,从而排除了临时表。关于如何解决或测试您的最后主张的任何想法吗?
liver.larson

Answers:


6

编辑:这是第二次尝试

根据@Max Vernon的答案,这是一种在内联子查询中绕过使用CTE的方法,就像自我加入CTE一样,我认为这是效率低下的原因。它使用仅在2012版SQL Server中可用的分析功能。在SQL-Fiddle上测试

这部分内容可以从阅读中跳过,它是Max的答案中的粘贴内容:

;With AccountHierarchy AS
(                                                                           
    SELECT
        Root.AcctId                                         AccountId
        , Root.Name                                         AccountName
        , Root.ParentId                                     ParentId
        , 1                                                 HierarchyLevel  
        , cast(Root.Acctid as varchar(4000))                IdHierarchyMatch        
        , cast(Root.Acctid as varchar(4000))                IdHierarchy
        , cast(replace(Root.Name,'.','') as varchar(4000))  NameHierarchy   
        , cast(Root.Acctid as varchar(4000))                HierarchySort
        , cast(Root.Name as varchar(4000))                  HierarchyLabel          ,
        Root.CustomerCount                                  CustomerCount   

    FROM 
        account Root

    WHERE
        Root.ParentID is null

    UNION ALL

    SELECT
        Recurse.Acctid                                      AccountId
        , Recurse.Name                                      AccountName
        , Recurse.ParentId                                  ParentId
        , Root.HierarchyLevel + 1                           HierarchyLevel
        , CAST(CAST(Root.IdHierarchyMatch as varchar(40)) + '.' 
            + cast(recurse.Acctid as varchar(40))   as varchar(4000))   IdHierarchyMatch
        , cast(cast(recurse.Acctid as varchar(40)) + '.' 
            + Root.IdHierarchy  as varchar(4000))           IdHierarchy
        , cast(replace(recurse.Name,'.','') + '.' 
            + Root.NameHierarchy as varchar(4000))          NameHierarchy
        , cast(Root.AccountName + '.' 
            + Recurse.Name as varchar(4000))                HierarchySort   
        , cast(space(root.HierarchyLevel * 4) 
            + Recurse.Name as varchar(4000))                HierarchyLabel
        , Recurse.CustomerCount                             CustomerCount
    FROM
        account Recurse INNER JOIN
        AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)

在这里,我们使用来对CTE的行进行排序,IdHierarchyMatch并计算行号和运行总计(从下一行到末尾)。

, cte1 AS 
(
SELECT
    h.AccountId
    , h.AccountName
    , h.ParentId
    , h.HierarchyLevel
    , h.IdHierarchy
    , h.NameHierarchy
    , h.HierarchyLabel
    , parsename(h.IdHierarchy,1) Acct1Id
    , parsename(h.NameHierarchy,1) Acct1Name
    , parsename(h.IdHierarchy,2) Acct2Id
    , parsename(h.NameHierarchy,2) Acct2Name
    , parsename(h.IdHierarchy,3) Acct3Id
    , parsename(h.NameHierarchy,3) Acct3Name
    , parsename(h.IdHierarchy,4) Acct4Id
    , parsename(h.NameHierarchy,4) Acct4Name
    , h.CustomerCount
    , h.HierarchySort
    , h.IdHierarchyMatch
        , Rn = ROW_NUMBER() OVER 
                  (ORDER BY h.IdHierarchyMatch)
        , RunningCustomerCount = COALESCE(
            SUM(h.CustomerCount)
            OVER
              (ORDER BY h.IdHierarchyMatch
               ROWS BETWEEN 1 FOLLOWING
                        AND UNBOUNDED FOLLOWING)
          , 0) 
FROM
    AccountHierarchy AS h  
)

然后,我们又有了一个中间CTE,在其中我们使用了之前的运行总计和行号-基本上是找到树结构分支的终点:

, cte2 AS
(
SELECT
    cte1.*
    , rn3  = LAST_VALUE(Rn) OVER 
               (PARTITION BY Acct1Id, Acct2Id, Acct3Id 
                ORDER BY Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)       
    , rn2  = LAST_VALUE(Rn) OVER 
               (PARTITION BY Acct1Id, Acct2Id 
                ORDER BY Acct3Id, Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
    , rn1  = LAST_VALUE(Rn) OVER 
               (PARTITION BY Acct1Id 
                ORDER BY Acct2Id, Acct3Id, Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
    , rcc3 = LAST_VALUE(RunningCustomerCount) OVER 
               (PARTITION BY Acct1Id, Acct2Id, Acct3Id 
                ORDER BY Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)       
    , rcc2 = LAST_VALUE(RunningCustomerCount) OVER 
               (PARTITION BY Acct1Id, Acct2Id 
                ORDER BY Acct3Id, Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
    , rcc1 = LAST_VALUE(RunningCustomerCount) OVER 
               (PARTITION BY Acct1Id 
                ORDER BY Acct2Id, Acct3Id, Acct4Id
                ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
FROM
    cte1 
) 

最后,我们构建最后一部分:

SELECT
    hier.AccountId
    , hier.AccountName
    ---                        -- columns skipped 
    , hier.CustomerCount

    , TotalCustomerCount = hier.CustomerCount
        + hier.RunningCustomerCount 
        - ca.LastRunningCustomerCount

    , hier.HierarchySort
    , hier.IdHierarchyMatch
FROM
    cte2 hier
  OUTER APPLY
    ( SELECT  LastRunningCustomerCount, Rn
      FROM
      ( SELECT LastRunningCustomerCount
              = RunningCustomerCount, Rn
        FROM (SELECT NULL a) x  WHERE 4 <= HierarchyLevel 
      UNION ALL
        SELECT rcc3, Rn3
        FROM (SELECT NULL a) x  WHERE 3 <= HierarchyLevel 
      UNION ALL
        SELECT rcc2, Rn2 
        FROM (SELECT NULL a) x  WHERE 2 <= HierarchyLevel 
      UNION ALL
        SELECT rcc1, Rn1
        FROM (SELECT NULL a) x  WHERE 1 <= HierarchyLevel 
      ) x
      ORDER BY Rn 
      OFFSET 0 ROWS
      FETCH NEXT 1 ROWS ONLY
      ) ca
ORDER BY
    hier.HierarchySort ; 

并进行简化,使用与cte1上面的代码相同。在SQL-Fiddle-2上进行测试。请注意,这两种解决方案均在您的树中最多包含四个级别的前提下工作:

SELECT
    hier.AccountId
    ---                      -- skipping rows
    , hier.CustomerCount

    , TotalCustomerCount = CustomerCount
        + RunningCustomerCount 
        - CASE HierarchyLevel
            WHEN 4 THEN RunningCustomerCount
            WHEN 3 THEN LAST_VALUE(RunningCustomerCount) OVER 
                   (PARTITION BY Acct1Id, Acct2Id, Acct3Id 
                    ORDER BY Acct4Id
                    ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)       
            WHEN 2 THEN LAST_VALUE(RunningCustomerCount) OVER 
                   (PARTITION BY Acct1Id, Acct2Id 
                    ORDER BY Acct3Id, Acct4Id
                    ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
            WHEN 1 THEN LAST_VALUE(RunningCustomerCount) OVER 
                   (PARTITION BY Acct1Id 
                    ORDER BY Acct2Id, Acct3Id, Acct4Id
                    ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) 
          END

    , hier.HierarchySort
    , hier.IdHierarchyMatch
FROM cte1 AS hier
ORDER BY
    hier.HierarchySort ; 

第三种方法,仅对递归部分使用一个CTE,然后仅对窗口聚合函数(SUM() OVER (...))进行,因此它应适用于2005年以后的任何版本。在SQL-Fiddle-3上进行测试此解决方案假定像以前一样,在层次结构树中最多有4个级别:

;WITH AccountHierarchy AS
(                                                                           
    SELECT
          AccountId      = Root.AcctId                                         
        , AccountName    = Root.Name                                         
        , ParentId       = Root.ParentId                                     
        , HierarchyLevel = 1                                                   
        , HierarchySort  = CAST(Root.Acctid AS VARCHAR(4000))                
        , HierarchyLabel = CAST(Root.Name AS VARCHAR(4000))                   
        , Acct1Id        = CAST(Root.Acctid AS VARCHAR(4000))                
        , Acct2Id        = CAST(NULL AS VARCHAR(4000))                       
        , Acct3Id        = CAST(NULL AS VARCHAR(4000))                       
        , Acct4Id        = CAST(NULL AS VARCHAR(4000))                       
        , Acct1Name      = CAST(Root.Name AS VARCHAR(4000))                  
        , Acct2Name      = CAST(NULL AS VARCHAR(4000))                       
        , Acct3Name      = CAST(NULL AS VARCHAR(4000))                       
        , Acct4Name      = CAST(NULL AS VARCHAR(4000))                       
        , CustomerCount  = Root.CustomerCount                                   

    FROM 
        account AS Root

    WHERE
        Root.ParentID IS NULL

    UNION ALL

    SELECT
          Recurse.Acctid 
        , Recurse.Name 
        , Recurse.ParentId 
        , Root.HierarchyLevel + 1 
        , CAST(Root.AccountName + '.' 
            + Recurse.Name AS VARCHAR(4000)) 
        , CAST(SPACE(Root.HierarchyLevel * 4) 
            + Recurse.Name AS VARCHAR(4000)) 
        , Root.Acct1Id 
        , CASE WHEN Root.HierarchyLevel = 1 
              THEN cast(Recurse.Acctid AS VARCHAR(4000)) 
              ELSE Root.Acct2Id 
          END 
        , CASE WHEN Root.HierarchyLevel = 2 
              THEN CAST(Recurse.Acctid AS VARCHAR(4000)) 
              ELSE Root.Acct3Id 
          END 
        , CASE WHEN Root.HierarchyLevel = 3 
              THEN CAST(Recurse.Acctid AS VARCHAR(4000)) 
              ELSE Root.Acct4Id 
          END 

        , cast(Root.AccountName as varchar(4000))          
        , CASE WHEN Root.HierarchyLevel = 1 
              THEN CAST(Recurse.Name AS VARCHAR(4000)) 
              ELSE Root.Acct2Name 
          END 
        , CASE WHEN Root.HierarchyLevel = 2 
              THEN CAST(Recurse.Name AS VARCHAR(4000)) 
              ELSE Root.Acct3Name 
          END 
        , CASE WHEN Root.HierarchyLevel = 3 
              THEN CAST(Recurse.Name AS VARCHAR(4000)) 
              ELSE Root.Acct4Name 
          END 
        , Recurse.CustomerCount 
    FROM 
        account AS Recurse INNER JOIN 
        AccountHierarchy AS Root ON Root.AccountId = Recurse.ParentId
)

SELECT
      h.AccountId
    , h.AccountName
    , h.ParentId
    , h.HierarchyLevel
    , IdHierarchy = 
          CAST(COALESCE(h.Acct4Id+'.','') 
               + COALESCE(h.Acct3Id+'.','') 
               + COALESCE(h.Acct2Id+'.','') 
               + h.Acct1Id AS VARCHAR(4000))
    , NameHierarchy = 
          CAST(COALESCE(h.Acct4Name+'.','') 
               + COALESCE(h.Acct3Name+'.','') 
               + COALESCE(h.Acct2Name+'.','') 
               + h.Acct1Name AS VARCHAR(4000))   
    , h.HierarchyLabel
    , h.Acct1Id
    , h.Acct1Name
    , h.Acct2Id
    , h.Acct2Name
    , h.Acct3Id
    , h.Acct3Name
    , h.Acct4Id
    , h.Acct4Name
    , h.CustomerCount
    , TotalCustomerCount =  
          CASE h.HierarchyLevel
            WHEN 4 THEN h.CustomerCount
            WHEN 3 THEN SUM(h.CustomerCount) OVER 
                   (PARTITION BY h.Acct1Id, h.Acct2Id, h.Acct3Id)       
            WHEN 2 THEN SUM(h.CustomerCount) OVER 
                   (PARTITION BY Acct1Id, h.Acct2Id) 
            WHEN 1 THEN SUM(h.CustomerCount) OVER 
                   (PARTITION BY h.Acct1Id) 
          END
    , h.HierarchySort
    , IdHierarchyMatch = 
          CAST(h.Acct1Id 
               + COALESCE('.'+h.Acct2Id,'') 
               + COALESCE('.'+h.Acct3Id,'') 
               + COALESCE('.'+h.Acct4Id,'') AS VARCHAR(4000))   
FROM
    AccountHierarchy AS h  
ORDER BY
    h.HierarchySort ; 

第四种方法,它计算层次结构的关闭表作为中间CTE。在SQL-Fiddle-4上进行测试。好处是,对于总和计算,级别数没有限制。

;WITH AccountHierarchy AS
( 
    -- skipping several line, identical to the 3rd approach above
)

, ClosureTable AS
( 
    SELECT
          AccountId      = Root.AcctId  
        , AncestorId     = Root.AcctId  
        , CustomerCount  = Root.CustomerCount 
    FROM 
        account AS Root

    UNION ALL

    SELECT
          Recurse.Acctid 
        , Root.AncestorId 
        , Recurse.CustomerCount
    FROM 
        account AS Recurse INNER JOIN 
        ClosureTable AS Root ON Root.AccountId = Recurse.ParentId
)

, ClosureGroup AS
(                                                                           
    SELECT
          AccountId           = AncestorId  
        , TotalCustomerCount  = SUM(CustomerCount)                             
    FROM 
        ClosureTable AS a
    GROUP BY
        AncestorId
)

SELECT
      h.AccountId
    , h.AccountName
    , h.ParentId
    , h.HierarchyLevel 
    , h.HierarchyLabel
    , h.CustomerCount
    , cg.TotalCustomerCount 

    , h.HierarchySort
FROM
    AccountHierarchy AS h  
  JOIN
    ClosureGroup AS cg
      ON cg.AccountId = h.AccountId
ORDER BY
    h.HierarchySort ;  

更正了代码(以及链接的小提琴。)答案中缺少WHEN选项。
ypercubeᵀᴹ

+1-我喜欢使用2012功能。我现在学到很多东西!
Max Vernon

好的,只是花了一些时间潜入水中,才意识到性能很棒,但是数字不匹配。将结果与我的原件对照。我可以看到您正在使用的运行总数,但是必须进行一些更改才能按预期进行工作,而我还没有找到正确的解决方案。您的方法为我提供了一些便利,但是到目前为止,这还不是可行的解决方案。
liver.larson

哦,我认为这是严重错误。请接受。
ypercubeᵀᴹ

我试图纠正它。我的小样本可以正常工作,但请根据您的数据测试正确性。关于效率,我只能说通过测试才能知道(除非您的名字是@Paul White)。
ypercubeᵀᴹ

5

我相信这应该使其更快:

;With AccountHierarchy AS
(                                                                           
    SELECT
        Root.AcctId                                         AccountId
        , Root.Name                                         AccountName
        , Root.ParentId                                     ParentId
        , 1                                                 HierarchyLevel  
        , cast(Root.Acctid as varchar(4000))                IdHierarchyMatch        
        , cast(Root.Acctid as varchar(4000))                IdHierarchy
        , cast(replace(Root.Name,'.','') as varchar(4000))  NameHierarchy   
        , cast(Root.Acctid as varchar(4000))                HierarchySort
        , cast(Root.Name as varchar(4000))                  HierarchyLabel          ,
        Root.CustomerCount                                  CustomerCount   

    FROM 
        tempdb.dbo.account Root

    WHERE
        Root.ParentID is null

    UNION ALL

    SELECT
        Recurse.Acctid                                      AccountId
        , Recurse.Name                                      AccountName
        , Recurse.ParentId                                  ParentId
        , Root.HierarchyLevel + 1                           HierarchyLevel
        , CAST(CAST(Root.IdHierarchyMatch as varchar(40)) + '.' 
            + cast(recurse.Acctid as varchar(40))   as varchar(4000))   IdHierarchyMatch
        , cast(cast(recurse.Acctid as varchar(40)) + '.' 
            + Root.IdHierarchy  as varchar(4000))           IdHierarchy
        , cast(replace(recurse.Name,'.','') + '.' 
            + Root.NameHierarchy as varchar(4000))          NameHierarchy
        , cast(Root.AccountName + '.' 
            + Recurse.Name as varchar(4000))                HierarchySort   
        , cast(space(root.HierarchyLevel * 4) 
            + Recurse.Name as varchar(4000))                HierarchyLabel
        , Recurse.CustomerCount                             CustomerCount
    FROM
        tempdb.dbo.account Recurse INNER JOIN
        AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)


SELECT
    hier.AccountId
    , Hier.AccountName
    , hier.ParentId
    , hier.HierarchyLevel
    , hier.IdHierarchy
    , hier.NameHierarchy
    , hier.HierarchyLabel
    , parsename(hier.IdHierarchy,1) Acct1Id
    , parsename(hier.NameHierarchy,1) Acct1Name
    , parsename(hier.IdHierarchy,2) Acct2Id
    , parsename(hier.NameHierarchy,2) Acct2Name
    , parsename(hier.IdHierarchy,3) Acct3Id
    , parsename(hier.NameHierarchy,3) Acct3Name
    , parsename(hier.IdHierarchy,4) Acct4Id
    , parsename(hier.NameHierarchy,4) Acct4Name
    , hier.CustomerCount
    , (
        SELECT  
            sum(children.CustomerCount)
        FROM
            AccountHierarchy Children
        WHERE
            Children.IdHierarchyMatch LIKE hier.IdHierarchyMatch + '%'
        ) TotalCustomerCount
        , HierarchySort
        , IdHierarchyMatch
FROM
    AccountHierarchy hier
ORDER BY
    hier.HierarchySort

我在CTE中添加了一个名为的列,该列IdHierarchyMatchIdHierarchy启用TotalCustomerCount子查询的正向版本WHERE子句具有可保留性。

比较执行计划的估计子树成本,这种方法应该快大约5倍。


感谢您抽出宝贵的时间查看此内容。有趣的是,这实际上是我的第一个本能,并认为仅可使用动态SQL来向字段添加通配符,因此甚至没有尝试。我应该检查一下。因此,结果是从2:49显着改善(从3:53降低),但是没有我期望的那么多。我将无话可说,看看还有什么其他想法。再次感谢您抽出宝贵的时间对此进行评估。我很感激。
liver.larson

仅供参考,我刚发现我的实现中出现语法错误。我们到了2:04执行时间。我开始的地方还更好。仍在追求更快。
liver.larson

1
很高兴我以一点点帮助。昨晚我花了大约2个小时来设法解决这个问题。我有一种很深的感觉,可以使用某种方法解决ROW_NUMER() OVER (ORDER BY...)。我只是无法从中得到正确的数字。这是一个非常有趣的问题。锻炼大脑!
Max Vernon

我尝试将其放入架构绑定(实例化)视图中,目的是在IdHierarchyMatch字段上添加索引,但是您不能在包含CTE的架构绑定视图上添加聚簇索引。我不知道是否该限制在SQL Server 2014解决
最大弗农

2
@MaxVernon对于2012年版本:SQL-Fiddle
ypercube13 2013年

3

我也试了一下。它不是很漂亮,但是似乎表现更好。

USE Tempdb
go

SET STATISTICS IO ON;
SET STATISTICS TIME OFF;
SET NOCOUNT ON;

--------
-- assuming the original table looks something like this 
-- and you cannot control it's indexes 
-- (only widened the data types a bit for the extra sample rows)
--------
CREATE TABLE dbo.Account
    (
      Acctid VARCHAR(10) NOT NULL ,
      Name VARCHAR(100) NULL ,
      ParentId VARCHAR(10) NULL ,
      CustomerCount INT NULL
    );

--------
-- inserting the same records as in your sample
--------
INSERT  Account
        SELECT  'A' ,
                'Best Bet' ,
                NULL ,
                21
        UNION ALL
        SELECT  'B' ,
                'eStore' ,
                'A' ,
                30
        UNION ALL
        SELECT  'C' ,
                'Big Bens' ,
                'B' ,
                75
        UNION ALL
        SELECT  'D' ,
                'Mr. Jimbo' ,
                'B' ,
                50
        UNION ALL
        SELECT  'E' ,
                'Dr. John' ,
                'C' ,
                100
        UNION ALL
        SELECT  'F' ,
                'Brick' ,
                'A' ,
                222
        UNION ALL
        SELECT  'G' ,
                'Mortar' ,
                'C' ,
                153;

--------
-- now lets up the ante a bit and add some extra rows with random parents 
-- to these 7 items, it is hard to measure differences with so few rows
--------
DECLARE @numberOfRows INT = 25000
DECLARE @from INT = 1
DECLARE @to INT = 7
DECLARE @T1 TABLE ( n INT ); 

WITH    cte ( n )
          AS ( SELECT   ROW_NUMBER() OVER ( ORDER BY CURRENT_TIMESTAMP )
               FROM     sys.messages
             )
    INSERT  INTO @T1
            SELECT  n
            FROM    cte
            WHERE   n <= @numberOfRows;

INSERT  INTO dbo.Account
        ( acctId ,
          name ,
          parentId ,
          Customercount
        )
        SELECT  CHAR(64 + RandomNumber) + CAST(n AS VARCHAR(10)) AS Id ,
                CAST('item ' + CHAR(64 + RandomNumber) + CAST(n AS VARCHAR(10)) AS VARCHAR(100)) ,
                CHAR(64 + RandomNumber) AS parentId ,
                ABS(CHECKSUM(NEWID()) % 100) + 1 AS RandomCustCount
        FROM    ( SELECT    n ,
                            ABS(CHECKSUM(NEWID()) % @to) + @from AS RandomNumber
                  FROM      @T1
                ) A;

--------
-- Assuming you cannot control it's indexes, in my tests we're better off taking the IO hit of copying the data
-- to some structure that is better optimized for this query. Not quite what I initially expected,  but we seem 
-- to be better off that way.
--------
CREATE TABLE tempdb.dbo.T1
    (
      AccountId VARCHAR(10) NOT NULL
                            PRIMARY KEY NONCLUSTERED ,
      AccountName VARCHAR(100) NOT NULL ,
      ParentId VARCHAR(10) NULL ,
      HierarchyLevel INT NULL ,
      HPath VARCHAR(1000) NULL ,
      IdHierarchy VARCHAR(1000) NULL ,
      NameHierarchy VARCHAR(1000) NULL ,
      HierarchyLabel VARCHAR(1000) NULL ,
      HierarchySort VARCHAR(1000) NULL ,
      CustomerCount INT NOT NULL
    );

CREATE CLUSTERED INDEX IX_Q1
ON tempdb.dbo.T1  ([ParentId]);

-- for summing customer counts over parents
CREATE NONCLUSTERED INDEX IX_Q2 
ON tempdb.dbo.T1  (HPath) INCLUDE(CustomerCount);

INSERT  INTO tempdb.dbo.T1
        ( AccountId ,
          AccountName ,
          ParentId ,
          HierarchyLevel ,
          HPath ,
          IdHierarchy ,
          NameHierarchy ,
          HierarchyLabel ,
          HierarchySort ,
          CustomerCount 
        )
        SELECT  Acctid AS AccountId ,
                Name AS AccountName ,
                ParentId AS ParentId ,
                NULL AS HierarchyLevel ,
                NULL AS HPath ,
                NULL AS IdHierarchy ,
                NULL AS NameHierarchy ,
                NULL AS HierarchyLabel ,
                NULL AS HierarchySort ,
                CustomerCount AS CustomerCount
        FROM    tempdb.dbo.account;



--------
-- I cannot seem to force an efficient way to do the sum while selecting over the recursive cte, 
-- so I took it aside. I am sure there is a more elegant way but I can't seem to make it happen. 
-- At least it performs better this way. But it remains a very expensive query.
--------
;
WITH    AccountHierarchy
          AS ( SELECT   Root.AccountId AS AcId ,
                        Root.ParentId ,
                        1 AS HLvl ,
                        CAST(Root.AccountId AS VARCHAR(1000)) AS [HPa] ,
                        CAST(Root.accountId AS VARCHAR(1000)) AS hid ,
                        CAST(REPLACE(Root.AccountName, '.', '') AS VARCHAR(1000)) AS hn ,
                        CAST(Root.accountid AS VARCHAR(1000)) AS hs ,
                        CAST(Root.accountname AS VARCHAR(1000)) AS hl
               FROM     tempdb.dbo.T1 Root
               WHERE    Root.ParentID IS NULL
               UNION ALL
               SELECT   Recurse.AccountId AS acid ,
                        Recurse.ParentId ParentId ,
                        Root.Hlvl + 1 AS hlvl ,
                        CAST(Root.HPa + '.' + Recurse.AccountId AS VARCHAR(1000)) AS hpa ,
                        CAST(recurse.AccountId + '.' + Root.hid AS VARCHAR(1000)) AS hid ,
                        CAST(REPLACE(recurse.AccountName, '.', '') + '.' + Root.hn AS VARCHAR(1000)) AS hn ,
                        CAST(Root.hs + '.' + Recurse.AccountName AS VARCHAR(1000)) AS hs ,
                        CAST(SPACE(root.hlvl * 4) + Recurse.AccountName AS VARCHAR(1000)) AS hl
               FROM     tempdb.dbo.T1 Recurse
                        INNER JOIN AccountHierarchy Root ON Root.AcId = Recurse.ParentId
             )
    UPDATE  tempdb.dbo.T1
    SET     HierarchyLevel = HLvl ,
            HPath = Hpa ,
            IdHierarchy = hid ,
            NameHierarchy = hn ,
            HierarchyLabel = hl ,
            HierarchySort = hs
    FROM    AccountHierarchy
    WHERE   AccountId = AcId;

SELECT  --HPath ,
        AccountId ,
        AccountName ,
        ParentId ,
        HierarchyLevel ,
        IdHierarchy ,
        NameHierarchy ,
        HierarchyLabel ,
        PARSENAME(IdHierarchy, 1) Acct1Id ,
        PARSENAME(NameHierarchy, 1) Acct1Name ,
        PARSENAME(IdHierarchy, 2) Acct2Id ,
        PARSENAME(NameHierarchy, 2) Acct2Name ,
        PARSENAME(IdHierarchy, 3) Acct3Id ,
        PARSENAME(NameHierarchy, 3) Acct3Name ,
        PARSENAME(IdHierarchy, 4) Acct4Id ,
        PARSENAME(NameHierarchy, 4) Acct4Name ,
        CustomerCount ,
        Cnt.TotalCustomerCount
FROM    tempdb.dbo.t1 Hier
        CROSS APPLY ( SELECT    SUM(CustomerCount) AS TotalCustomerCount
                      FROM      tempdb.dbo.t1
                      WHERE     HPath LIKE hier.HPath + '%'
                    ) Cnt
ORDER BY HierarchySort;

DROP TABLE tempdb.dbo.t1;
DROP TABLE tempdb.dbo.Account;

英勇的尝试。这就是一些相当不错的示例数据生成。我需要变得更好一些。仍在寻找那种优雅的解决方案,我敢肯定那里还在等待。
liver.larson
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.