在下面更新
我有一个具有典型acct /父帐户体系结构的帐户表来表示帐户层次结构(SQL Server 2012)。我使用CTE创建了一个VIEW,以散列层次结构,总的来说,它可以按预期的方式很好地工作。我可以在任何级别查询层次结构,并轻松查看分支。
需要根据层次结构返回一个业务逻辑字段。每个帐户记录中的一个字段描述了企业的规模(我们将其称为CustomerCount)。我需要报告的逻辑需要从整个分支汇总CustomerCount。换句话说,给定一个帐户,我需要对该帐户以及层次结构中该帐户下方每个分支中的每个子级的客户计数值进行汇总。
我使用CTE中内置的层次结构字段成功计算了该字段,该字段类似于acct4.acct3.acct2.acct1。我遇到的问题只是使其运行速度很快。如果没有这一计算字段,查询将在约3秒内运行。当我在计算字段中添加时,它变成一个4分钟的查询。
这是我能够想到的最佳版本,它可以返回正确的结果。我正在寻找有关如何在不牺牲性能的前提下重新构建此“视图”的想法。
我知道这种方法运行缓慢的原因(需要在where子句中计算谓词),但是我想不出另一种方法来构造它并仍然得到相同的结果。
这是一些示例代码,用于构建表并完全按照其在我的环境中的工作方式执行CTE。
Use Tempdb
go
CREATE TABLE dbo.Account
(
Acctid varchar(1) NOT NULL
, Name varchar(30) NULL
, ParentId varchar(1) NULL
, CustomerCount int NULL
);
INSERT Account
SELECT 'A','Best Bet',NULL,21 UNION ALL
SELECT 'B','eStore','A',30 UNION ALL
SELECT 'C','Big Bens','B',75 UNION ALL
SELECT 'D','Mr. Jimbo','B',50 UNION ALL
SELECT 'E','Dr. John','C',100 UNION ALL
SELECT 'F','Brick','A',222 UNION ALL
SELECT 'G','Mortar','C',153 ;
With AccountHierarchy AS
( --Root values have no parent
SELECT
Root.AcctId AccountId
, Root.Name AccountName
, Root.ParentId ParentId
, 1 HierarchyLevel
, cast(Root.Acctid as varchar(4000)) IdHierarchy --highest parent reads right to left as in id3.Acctid2.Acctid1
, cast(replace(Root.Name,'.','') as varchar(4000)) NameHierarchy --highest parent reads right to left as in name3.name2.name1 (replace '.' so name parse is easy in last step)
, cast(Root.Acctid as varchar(4000)) HierarchySort --reverse of above, read left to right name1.name2.name3 for sorting on reporting only
, cast(Root.Name as varchar(4000)) HierarchyLabel --use for labels on reporting only, indents names under sorted hierarchy
, Root.CustomerCount CustomerCount
FROM
tempdb.dbo.account Root
WHERE
Root.ParentID is null
UNION ALL
SELECT
Recurse.Acctid AccountId
, Recurse.Name AccountName
, Recurse.ParentId ParentId
, Root.HierarchyLevel + 1 HierarchyLevel --next level in hierarchy
, cast(cast(recurse.Acctid as varchar(40)) + '.' + Root.IdHierarchy as varchar(4000)) IdHierarchy --cast because in real system this is a uniqueidentifier type needs converting
, cast(replace(recurse.Name,'.','') + '.' + Root.NameHierarchy as varchar(4000)) NameHierarchy --replace '.' for parsing in last step, cast to make room for lots of sub levels down the hierarchy
, cast(Root.AccountName + '.' + Recurse.Name as varchar(4000)) HierarchySort
, cast(space(root.HierarchyLevel * 4) + Recurse.Name as varchar(4000)) HierarchyLabel
, Recurse.CustomerCount CustomerCount
FROM
tempdb.dbo.account Recurse INNER JOIN
AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)
SELECT
hier.AccountId
, Hier.AccountName
, hier.ParentId
, hier.HierarchyLevel
, hier.IdHierarchy
, hier.NameHierarchy
, hier.HierarchyLabel
, parsename(hier.IdHierarchy,1) Acct1Id
, parsename(hier.NameHierarchy,1) Acct1Name --This is why we stripped out '.' during recursion
, parsename(hier.IdHierarchy,2) Acct2Id
, parsename(hier.NameHierarchy,2) Acct2Name
, parsename(hier.IdHierarchy,3) Acct3Id
, parsename(hier.NameHierarchy,3) Acct3Name
, parsename(hier.IdHierarchy,4) Acct4Id
, parsename(hier.NameHierarchy,4) Acct4Name
, hier.CustomerCount
/* fantastic up to this point. Next block of code is what causes problem.
Logic of code is "sum of CustomerCount for this location and all branches below in this branch of hierarchy"
In live environment, goes from taking 3 seconds to 4 minutes by adding this one calc */
, (
SELECT
sum(children.CustomerCount)
FROM
AccountHierarchy Children
WHERE
hier.IdHierarchy = right(children.IdHierarchy, (1 /*length of id field*/ * hier.HierarchyLevel) + hier.HierarchyLevel - 1 /*for periods inbetween ids*/)
--"where this location's idhierarchy is within child idhierarchy"
--previously tried a charindex(hier.IdHierarchy,children.IdHierarchy)>0, but that performed even worse
) TotalCustomerCount
FROM
AccountHierarchy hier
ORDER BY
hier.HierarchySort
drop table tempdb.dbo.Account
2013年11月20日更新
某些建议的解决方案使我的工作顺其自然,我尝试了一种接近的新方法,但引入了新的/不同的障碍。老实说,我不知道是否需要单独发表,但这与解决此问题有关。
我决定让sum(customercount)变得困难的原因是在从顶部开始并向下构建的层次结构的上下文中识别子级。因此,我首先创建了一个由下而上构建的层次结构,使用“不是其他任何帐户的父级帐户”定义的根目录,并进行递归联接(root.parentacctid = recurse.acctid)
这样,我可以在递归发生时将子级客户计数添加到父级。由于我需要报告和级别,因此,除了自上而下,我还要执行自下而上的CTE,然后通过帐户ID将它们加入。事实证明,这种方法比最初的外部查询客户数量要快得多,但遇到了一些障碍。
首先,我无意间捕获了多个孩子的父级帐户的重复客户计数。根据孩子的数量,我算是某种计费方式的客户计数的两倍或三倍。我的解决方案是创建另一个计算acct的节点的cte,并在递归过程中对acct.customercount进行划分,因此当我累加整个分支时,不会重复计算acct。
因此,在这一点上,此新版本的结果不正确,但我知道为什么。自下而上的cte正在创建重复项。递归通过后,它将在根(子级子级)中查找帐户表中子级的子项。在第三次递归中,它选择与第二次递归相同的帐户,然后再次放入。
关于如何进行自下而上的CTE的想法,还是这会引起其他想法的流行?
Use Tempdb
go
CREATE TABLE dbo.Account
(
Acctid varchar(1) NOT NULL
, Name varchar(30) NULL
, ParentId varchar(1) NULL
, CustomerCount int NULL
);
INSERT Account
SELECT 'A','Best Bet',NULL,1 UNION ALL
SELECT 'B','eStore','A',2 UNION ALL
SELECT 'C','Big Bens','B',3 UNION ALL
SELECT 'D','Mr. Jimbo','B',4 UNION ALL
SELECT 'E','Dr. John','C',5 UNION ALL
SELECT 'F','Brick','A',6 UNION ALL
SELECT 'G','Mortar','C',7 ;
With AccountHierarchy AS
( --Root values have no parent
SELECT
Root.AcctId AccountId
, Root.Name AccountName
, Root.ParentId ParentId
, 1 HierarchyLevel
, cast(Root.Acctid as varchar(4000)) IdHierarchy --highest parent reads right to left as in id3.Acctid2.Acctid1
, cast(replace(Root.Name,'.','') as varchar(4000)) NameHierarchy --highest parent reads right to left as in name3.name2.name1 (replace '.' so name parse is easy in last step)
, cast(Root.Acctid as varchar(4000)) HierarchySort --reverse of above, read left to right name1.name2.name3 for sorting on reporting only
, cast(Root.Acctid as varchar(4000)) HierarchyMatch
, cast(Root.Name as varchar(4000)) HierarchyLabel --use for labels on reporting only, indents names under sorted hierarchy
, Root.CustomerCount CustomerCount
FROM
tempdb.dbo.account Root
WHERE
Root.ParentID is null
UNION ALL
SELECT
Recurse.Acctid AccountId
, Recurse.Name AccountName
, Recurse.ParentId ParentId
, Root.HierarchyLevel + 1 HierarchyLevel --next level in hierarchy
, cast(cast(recurse.Acctid as varchar(40)) + '.' + Root.IdHierarchy as varchar(4000)) IdHierarchy --cast because in real system this is a uniqueidentifier type needs converting
, cast(replace(recurse.Name,'.','') + '.' + Root.NameHierarchy as varchar(4000)) NameHierarchy --replace '.' for parsing in last step, cast to make room for lots of sub levels down the hierarchy
, cast(Root.AccountName + '.' + Recurse.Name as varchar(4000)) HierarchySort
, CAST(CAST(Root.HierarchyMatch as varchar(40)) + '.'
+ cast(recurse.Acctid as varchar(40)) as varchar(4000)) HierarchyMatch
, cast(space(root.HierarchyLevel * 4) + Recurse.Name as varchar(4000)) HierarchyLabel
, Recurse.CustomerCount CustomerCount
FROM
tempdb.dbo.account Recurse INNER JOIN
AccountHierarchy Root on Root.AccountId = Recurse.ParentId
)
, Nodes as
( --counts how many branches are below for any account that is parent to another
select
node.ParentId Acctid
, cast(count(1) as float) Nodes
from AccountHierarchy node
group by ParentId
)
, BottomUp as
( --creates the hierarchy starting at accounts that are not parent to any other
select
Root.Acctid
, root.ParentId
, cast(isnull(root.customercount,0) as float) CustomerCount
from
tempdb.dbo.Account Root
where
not exists ( select 1 from tempdb.dbo.Account OtherAccts where root.Acctid = OtherAccts.ParentId)
union all
select
Recurse.Acctid
, Recurse.ParentId
, root.CustomerCount + cast ((isnull(recurse.customercount,0) / nodes.nodes) as float) CustomerCount
-- divide the recurse customercount by number of nodes to prevent duplicate customer count on accts that are parent to multiple children, see customercount cte next
from
tempdb.dbo.Account Recurse inner join
BottomUp Root on root.ParentId = recurse.acctid inner join
Nodes on nodes.Acctid = recurse.Acctid
)
, CustomerCount as
(
select
sum(CustomerCount) TotalCustomerCount
, hier.acctid
from
BottomUp hier
group by
hier.Acctid
)
SELECT
hier.AccountId
, Hier.AccountName
, hier.ParentId
, hier.HierarchyLevel
, hier.IdHierarchy
, hier.NameHierarchy
, hier.HierarchyLabel
, hier.hierarchymatch
, parsename(hier.IdHierarchy,1) Acct1Id
, parsename(hier.NameHierarchy,1) Acct1Name --This is why we stripped out '.' during recursion
, parsename(hier.IdHierarchy,2) Acct2Id
, parsename(hier.NameHierarchy,2) Acct2Name
, parsename(hier.IdHierarchy,3) Acct3Id
, parsename(hier.NameHierarchy,3) Acct3Name
, parsename(hier.IdHierarchy,4) Acct4Id
, parsename(hier.NameHierarchy,4) Acct4Name
, hier.CustomerCount
, customercount.TotalCustomerCount
FROM
AccountHierarchy hier inner join
CustomerCount on customercount.acctid = hier.accountid
ORDER BY
hier.HierarchySort
drop table tempdb.dbo.Account