Guys,
Good afternoon!
Today I will show you how to remove HTML tags from a string in SQL Server using just T-SQL. This function was very useful for me, as I needed to include a column in a report that was exported to XLS (Excel), but this column was the HTML description of the calls generated by the system and in Excel, that bunch of HTML tags didn't look very good. It was really bad to read... That's when I developed this function and presented it to you.
Source code of the fncRemove_HTML function:
CREATE FUNCTION [dbo].[fncRemove_HTML] (@HTMLText VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
-- Substitui a entidade HTML "&" pelo caracter '&'
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Substitui a entidade HTML "<" pelo caracter '<'
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '<')
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Substitui a entidade HTML ">" pelo caracter '>'
SET @Start = CHARINDEX('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '>')
SET @Start = CHARINDEX('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Substitui a entidade HTML "&&" pelo caracter '&'
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
SET @Start = CHARINDEX('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Substitui a entidade HTML " " pelo caracter ' '
SET @Start = CHARINDEX(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ' ')
SET @Start = CHARINDEX(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Substitui a tag <br> pela sequência de nova linha (CHR(13) + CHR(10))
SET @Start = CHARINDEX('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
SET @Start = CHARINDEX('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Substitui a tag <br/> pela sequência de nova linha (CHR(13) + CHR(10))
SET @Start = CHARINDEX('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Substitui a tag <br /> pela sequência de nova linha (CHR(13) + CHR(10))
SET @Start = CHARINDEX('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
SET @Start = CHARINDEX('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove os parâmetros contidos nas tags HTML
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
BEGIN
SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '')
SET @Start = CHARINDEX('<', @HTMLText)
SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
SET @Length = (@End - @Start) + 1
END
RETURN LTRIM(RTRIM(@HTMLText))
END
Source code of the fncRemove_Formatacao_Html function
Another way to remove HTML tags and formatting from a string is to use the fncRemove_Formatacao_Html function, which I demonstrate below.
CREATE FUNCTION [dbo].[fncRemove_Formatacao_Html] (
@pe_sTexto VARCHAR(MAX)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE
@v_sTexto VARCHAR(MAX),
@v_iPosicaoInicio INT = 0,
@v_iPosicaoFim INT = 0,
@v_iTotalPosicoes INT = 0
SET @v_sTexto = @pe_sTexto
SET @v_sTexto = REPLACE(@v_sTexto, '<BR><BR><BR>', '<BR>')
SET @v_sTexto = REPLACE(@v_sTexto, '<BR><BR>', '<BR>')
SET @v_sTexto = REPLACE(@v_sTexto, '<BR>', '. ')
WHILE (1 = 1)
BEGIN
SET @v_iPosicaoInicio = PATINDEX('%<%', @v_sTexto)
SET @v_iPosicaoFim = ISNULL(NULLIF(PATINDEX('%>%', @v_sTexto), 0), LEN(@v_sTexto))
SET @v_iTotalPosicoes = ((@v_iPosicaoFim - @v_iPosicaoInicio) + 1)
IF (@v_iPosicaoFim<@v_iPosicaoInicio)
SET @v_sTexto = SUBSTRING(@v_sTexto, 1, @v_iPosicaoFim-1)+SUBSTRING(@v_sTexto, @v_iPosicaoFim+1, LEN(@v_sTexto))
ELSE
IF (@v_iPosicaoInicio <> 0)
SET @v_sTexto = REPLACE(@v_sTexto, SUBSTRING(@v_sTexto, @v_iPosicaoInicio, @v_iTotalPosicoes), '')
ELSE
BREAK;
SET @v_iPosicaoInicio = 0
SET @v_iPosicaoFim = 0
SET @v_iTotalPosicoes = 0
END
SET @v_sTexto = REPLACE(@v_sTexto, '..', '. ')
SET @v_sTexto = REPLACE(@v_sTexto, ',.', ', ')
SET @v_sTexto = REPLACE(@v_sTexto, '?.', '? ')
SET @v_sTexto = REPLACE(@v_sTexto, '!.', '! ')
SET @v_sTexto = REPLACE(@v_sTexto, ',.', ', ')
SET @v_sTexto = REPLACE(@v_sTexto, '.. ', '. ')
SET @v_sTexto = REPLACE(@v_sTexto, ',. ', ', ')
SET @v_sTexto = REPLACE(@v_sTexto, '?. ', '? ')
SET @v_sTexto = REPLACE(@v_sTexto, '!. ', '! ')
SET @v_sTexto = REPLACE(@v_sTexto, ',. ', ', ')
SET @v_sTexto = REPLACE(@v_sTexto, '. ', '. ')
SET @v_sTexto = REPLACE(@v_sTexto, ', ', ', ')
SET @v_sTexto = REPLACE(@v_sTexto, '? ', '? ')
SET @v_sTexto = REPLACE(@v_sTexto, '! ', '! ')
SET @v_sTexto = REPLACE(@v_sTexto, ' . ', '. ')
SET @v_sTexto = REPLACE(@v_sTexto, ' ', '')
SET @v_sTexto = RTRIM(@v_sTexto)
SET @v_sTexto = LTRIM(@v_sTexto)
RETURN @v_sTexto
END
If you want to use this same function, but with the source code in C# and using it in the CLR, for much better performance, see the post SQL Server – How to convert an HTML string to text (Remove HTML tags) using the CLR (C#).
A hug and see you later!


Comentários (0)
Carregando comentários…