如何在另一个字符串中查找字符串的所有位置


11

如何找到patindex表或变量中的所有位置?

declare @name nvarchar(max)
set @name ='ali reza dar yek shabe barani ba yek  '
  + 'dokhtare khoshkel be disco raft va ali baraye'
  + ' 1 saat anja bud va sepas... ali...'
select patindex('%ali%',@name) as pos 

返回,1但是我想要所有结果,例如:

pos
===
  1
 74
113

Answers:


9
declare @name nvarchar(max)
set @name ='ali reza dar yek shabe barani ba yek  dokhtare khoshkel be disco raft va ali baraye 1 saat anja bud va sepas... ali...'

Declare @a table (pos int)
Declare @pos int
Declare @oldpos int
Select @oldpos=0
select @pos=patindex('%ali%',@name) 
while @pos > 0 and @oldpos<>@pos
 begin
   insert into @a Values (@pos)
   Select @oldpos=@pos
   select @pos=patindex('%ali%',Substring(@name,@pos + 1,len(@name))) + @pos
end

Select * from @a

要使其可重用,可以在表函数中使用它来调用它,如下所示:

Select * from  dbo.F_CountPats ('ali reza dar yek shabe barani ba yek  dokhtare khoshkel be disco raft va ali baraye 1 saat anja bud va sepas... ali...','%ali%')

函数看起来像这样

Create FUNCTION [dbo].[F_CountPats] 
(
@txt varchar(max),
@Pat varchar(max)
)
RETURNS 
@tab TABLE 
(
 ID int
)
AS
BEGIN
Declare @pos int
Declare @oldpos int
Select @oldpos=0
select @pos=patindex(@pat,@txt) 
while @pos > 0 and @oldpos<>@pos
 begin
   insert into @tab Values (@pos)
   Select @oldpos=@pos
   select @pos=patindex(@pat,Substring(@txt,@pos + 1,len(@txt))) + @pos
end

RETURN 
END

GO

我知道这是个老问题,但是我对性能有疑问。我已经构建了两个函数,它们1在仅包含零和一的字符串中进行搜索。我使用了您的解决方案和@ aaron-bertrand,但是得到了相同的结果和相同的性能。哪个解决方案会更好?
Misiu 2015年

2
@Misiu符合预期Aaron Bertrands的解决方案不仅优雅,而且比我的解决方案快得多,应该是公认的解决方案。您可以使用更大的输入轻松地对此进行测试,使用他的示例,只需 在调用SELECT pos FROM dbo.FindPatternLocation(@name ,'ali');之前添加 SET @ name = Replicate(@ name,5000 );并尝试使用我的慢速程序。
bummi

15

我认为这将比您选择的循环方法(此处提供一些证据)稍微更有效,并且绝对比递归CTE更有效:

CREATE FUNCTION dbo.FindPatternLocation
(
    @string NVARCHAR(MAX),
    @term   NVARCHAR(255)
)
RETURNS TABLE
AS
    RETURN 
    (
      SELECT pos = Number - LEN(@term) 
      FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number, 
      CHARINDEX(@term, @string + @term, Number) - Number)))
      FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
      FROM sys.all_objects) AS n(Number)
      WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string)+1)
      AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
    ) AS y);

用法示例:

DECLARE @name NVARCHAR(MAX);

SET @name = N'ali reza dar yek shabe barani ba yek'
    + '  dokhtare khoshkel be disco raft va ali baraye '
    + '1 saat anja bud va sepas... ali...';

SELECT pos FROM dbo.FindPatternLocation(@name, 'ali');

结果:

pos
---
  1
 74
113

如果您的字符串长于2K,请使用sys.all_columns而不是sys.all_objects。如果长度超过8K,则添加交叉连接。


2

-递归CTE

with cte as
(select 'ali reza dar yek shabe barani ba yek  dokhtare khoshkel be disco raft va ali baraye 1 saat anja bud va sepas... ali...' as name
), 
pos as
(select patindex('%ali%',name) pos, name from cte
union all
select pos+patindex('%ali%',substring(name, pos+1, len(name))) pos, name from pos
where patindex('%ali%',substring(name, pos+1, len(name)))>0
)
select pos from pos

0

我喜欢亚伦·伯特兰(Aaron Bertrand)的回答。尽管我不太了解,但它看起来确实很优雅。

过去,使用时遇到权限问题sys.objects。结合我对代码进行故障排除的需要,我对Aaron的代码进行了改进,并在下面进行了添加。

这是我的程序:

CREATE PROCEDURE dbo.FindPatternLocations
-- Params
@TextToSearch nvarchar (max),
@TextToFind nvarchar (255)

AS
BEGIN

    declare @Length int
    set @Length = (Select LEN(@TextToSearch))

    declare @LengthSearchString int
    set @LengthSearchString = (select LEN (@TextToFind))

    declare @Index int
    set @Index=1

    create table #Positions (
    [POSID] [int] IDENTITY(0,1) NOT FOR REPLICATION NOT NULL,
    POS int
    )

    insert into #Positions (POS) select 0 -- to return a row even if no findings occur

        set @Index = (select charindex(@TextToFind, @TextToSearch, @Index))
                    if @Index = 0 goto Ende -- TextToFind is not in TextToSearch

        insert into #Positions (POS) select @Index


        set @Index = @Index + @LengthSearchString

while @Index <= @Length - @LengthSearchString   
    Begin
            set @Index = (select charindex(@TextToFind, @TextToSearch, @Index) )
            if @Index = 0 goto Ende -- no findings anymore
            insert into #Positions (POS) select @Index
            set @Index = @Index + @LengthSearchString
    end
Ende:
if (select MAX(posid) from #Positions) > 0 delete from #Positions where POSID = 0 -- row is not needed if TextToFind occurs
select * from #Positions
END
GO

MAX(posid)值也是找到的匹配项数。


要学究一点,这看起来不像是我的代码的变体。完全没有 :-)这正是我所提倡的那种蛮力循环(并已证明是较慢的)。
亚伦·伯特兰

0

这是一个基于Aaron答案的简单代码:

  • 不限于sys.all_objects的大小
  • 不要错过最后一个“ X”

码:

DECLARE @termToFind CHAR(1) = 'X'
DECLARE @string VARCHAR(40) = 'XX XXX  X   XX'

SET @string += '.' --Add any data here (different from the one searched) to get the position of the last character

DECLARE @stringLength BIGINT = len(@string)

SELECT pos = Number - LEN(@termToFind)
FROM (
    SELECT Number
        , Item = LTRIM(RTRIM(SUBSTRING(@string, Number, CHARINDEX(@termToFind, @string + @termToFind, Number) - Number)))
    FROM (
        --All numbers between 1 and the lengh of @string. Better than use sys.all_objects
        SELECT TOP (@stringLength) row_number() OVER (
                ORDER BY t1.number
                ) AS N
        FROM master..spt_values t1
        CROSS JOIN master..spt_values t2
        ) AS n(Number)
    WHERE Number > 1
        AND Number <= CONVERT(INT, LEN(@string))
        AND SUBSTRING(@termToFind + @string, Number, LEN(@termToFind)) = @termToFind
    ) AS y

结果

pos
--------------------
1
2
4
5
6
9
13
14

(8 row(s) affected)

我相信我已经解决了它的大小sys.all_columns(您可以使用任何源,只要它涵盖了最长字符串的长度),而且我还进行了重新测试,但看不到最后一个“ X”在哪里。 。
阿龙贝特朗

0

抱歉,大家来得太晚了,但我想让想要扩展此功能的人们更轻松地进行操作。我正在研究这些实现中的每一个,采用了对我来说似乎最好的实现(Aaron Bertrand),对其进行了简化,然后就可以使用“模板”。明智地使用它。

CREATE FUNCTION dbo.CHARINDICES (
    @search_expression NVARCHAR(4000),
    @expression_to_be_searched NVARCHAR(MAX)
) RETURNS TABLE AS RETURN (
    WITH tally AS (
        SELECT Number = ROW_NUMBER() OVER (ORDER BY [object_id])
        FROM sys.all_objects)
    SELECT DISTINCT n = subIdx -- (4) if we don't perform distinct we'll get result for each searched substring, and we don't want that
    FROM 
        tally 
        CROSS APPLY (SELECT subIdx = CHARINDEX(@search_expression, @expression_to_be_searched, Number)) x -- (2) subIdx is found in the rest of the substring 
    WHERE 
        Number BETWEEN 1 AND LEN(@expression_to_be_searched) -- (1) run for each substring once
        AND SubIdx != 0  -- (3) we care only about the indexes we've found, 0 stands for "not found"
)

SELECT CHARINDEX('C', 'BACBABCBABBCBACBBABC')
SELECT * FROM dbo.CHARINDICES('C', 'BACBABCBABBCBACBBABC')

只是作为参考-您可以从中得出其他行为,例如在PATINDEX()上展开:

CREATE FUNCTION dbo.PATINDICES (
    @search_expression NVARCHAR(4000) = '%[cS]%',
    @expression_to_be_searched NVARCHAR(MAX) = 'W3Schools.com'
) RETURNS TABLE AS RETURN (
    WITH tally AS (
        SELECT num = ROW_NUMBER() OVER (ORDER BY [object_id])
        FROM sys.all_objects)
    SELECT DISTINCT n = subIdx + num - 1
    FROM 
        tally 
        CROSS APPLY (SELECT numRev = LEN(@expression_to_be_searched) - num + 1) x
        CROSS APPLY (SELECT subExp = RIGHT(@expression_to_be_searched, numRev)) y
        CROSS APPLY (SELECT subIdx = PATINDEX(@search_expression, subExp)) z
    WHERE 
        num BETWEEN 1 AND LEN(@expression_to_be_searched)
        AND SubIdx != 0
)

SELECT PATINDEX('%[cS]%', 'W3Schools.com')
SELECT * FROM dbo.PATINDICES('%[cS]%', 'W3Schools.com')

0
Declare @search varchar(5)
    sET @search='a'
    Declare @name varchar(40)
    Set @name='AmitabhBachan'
    Declare @init int
    Set @init=1
    Declare @hold int
    Declare @table table (POSITION Int)
    While( @init<= LEn(@name))
    Begin
   Set @hold=(Select CHARINDEX(@search,@name,@init))
   If (@hold!=0)
   BEgin 
   --Print @hold
   Insert into @table
   Select @hold
   Set @init=@hold+1
   End 
   Else
   If (@hold=0)
   BEgin
   Break
   End
  End
  Select * from @table

一致的压痕和大小写将大大受益。用几句话来解释该方法和实现也将大有帮助。
格林
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.