So here it is:
IF OBJECT_ID (N'dbo.StripHTML', N'FN') IS NOT NULL
DROP FUNCTION StripHTML;
GO
CREATE FUNCTION dbo.StripHTML(@TextToClean nvarchar(max))
RETURNS NVARCHAR(max)
AS
BEGIN
--This function cleans out html tags and some special html characters
--20190129
--author: lyle.spencer@gmail.com
--original code: https://www.sqlservercentral.com/Forums/932589/Extracting-text-from-html-stored-in-SQL-table
-- To get the list of Tags removed....
Declare @TextToCleanTagNameTable Table(Tag nvarchar(max))
Declare @TextToCleanCharI1 int -- Used to locate '<'
Declare @TextToCleanCharI2 int -- Used to locate '>' to get Tag Name
Declare @TextToCleanCharICheck int -- Used to Check for Tag Name
Declare @TextToCleanEndTagName nvarchar(1000)
Declare @TextToCleanStartPos int
Set @TextToCleanStartPos = 0
While 1=1
begin
Set @TextToCleanCharI1 = CharIndex('<',@TextToClean,@TextToCleanStartPos)
if @TextToCleanCharI1 > 0
begin
-- Get the Tag Name, if it is a HTML Tag
Set @TextToCleanCharI2 = CharIndex('>',@TextToClean,@TextToCleanCharI1 + 1)
if @TextToCleanCharI2 > 0
begin
-- Check <>
if @TextToCleanCharI2 = @TextToCleanCharI1 + 1
begin
Set @TextToCleanStartPos = @TextToCleanCharI2 + 1
end
else
begin
Set @TextToCleanCharICheck = CharIndex('<',@TextToClean,@TextToCleanCharI1 + 1)
if (@TextToCleanCharICheck < @TextToCleanCharI2)
begin
-- Last < found was not start of tag. Just < character / sign
if @TextToCleanCharICheck > 0
Set @TextToCleanStartPos = @TextToCleanCharICheck
else
begin
-- @TextToCleanCharICheck = 0
-- Its a Last TAG...
Set @TextToCleanEndTagName = SubString(@TextToClean,@TextToCleanCharI1,(@TextToCleanCharI2-@TextToCleanCharI1) + 1)
if Not Exists(Select 1 from @TextToCleanTagNameTable where Tag = @TextToCleanEndTagName)
Set @TextToClean = Replace(@TextToClean,@TextToCleanEndTagName,'')
Set @TextToCleanStartPos = @TextToCleanCharI2
end
if @TextToCleanStartPos = Len(@TextToClean)
begin
--Select @TextToCleanStartPos , Len(@TextToClean),@TextToCleanCharICheck,@TextToCleanCharI1
--print 'Over Here....'
BREAK
end
end
else
begin
-- ITS A TAG....
Set @TextToCleanEndTagName = SubString(@TextToClean,@TextToCleanCharI1,(@TextToCleanCharI2-@TextToCleanCharI1) + 1)
if Not Exists(Select 1 from @TextToCleanTagNameTable where Tag = @TextToCleanEndTagName)
begin
Set @TextToClean = Replace(@TextToClean,@TextToCleanEndTagName,'')
end
--Set @TextToCleanStartPos = @TextToCleanCharI2 + 1
Set @TextToCleanStartPos = @TextToCleanCharI1
if @TextToCleanStartPos > Len(@TextToClean)
begin
--print 'Here....'
BREAK
end
end
end
end
else
BREAK -- No Tag
end
else
BREAK
end
RETURN @TextToClean
END;
GO
--test me
Select dbo.stripHTML('<test1> test2')
No comments:
Post a Comment