Guys,
Good afternoon!

Today I will show you how to remove HTML tags from a string in SQL Server using just T-SQL. This function was very useful for me, as I needed to include a column in a report that was exported to XLS (Excel), but this column was the HTML description of the calls generated by the system and in Excel, that bunch of HTML tags didn't look very good. It was really bad to read... That's when I developed this function and presented it to you.

Source code of the fncRemove_HTML function:

CREATE FUNCTION [dbo].[fncRemove_HTML] (@HTMLText VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN

    DECLARE	@Start INT
    DECLARE	@End INT
    DECLARE	@Length INT

    -- Substitui a entidade HTML "&" pelo caracter '&'
    SET @Start = CHARINDEX('&', @HTMLText)
    SET @End = @Start + 4
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
        SET @Start = CHARINDEX('&', @HTMLText)
        SET @End = @Start + 4
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a entidade HTML "<" pelo caracter '<'
    SET @Start = CHARINDEX('<', @HTMLText)
    SET @End = @Start + 3
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0) 
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '<')
        SET @Start = CHARINDEX('<', @HTMLText)
        SET @End = @Start + 3
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a entidade HTML ">" pelo caracter '>'
    SET @Start = CHARINDEX('>', @HTMLText)
    SET @End = @Start + 3
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '>')
        SET @Start = CHARINDEX('>', @HTMLText)
        SET @End = @Start + 3
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a entidade HTML "&&" pelo caracter '&'
    SET @Start = CHARINDEX('&amp;', @HTMLText)
    SET @End = @Start + 4
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '&')
        SET @Start = CHARINDEX('&amp;', @HTMLText)
        SET @End = @Start + 4
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a entidade HTML " " pelo caracter ' '
    SET @Start = CHARINDEX(' ', @HTMLText)
    SET @End = @Start + 5
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, ' ')
        SET @Start = CHARINDEX(' ', @HTMLText)
        SET @End = @Start + 5
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a tag <br> pela sequência de nova linha (CHR(13) + CHR(10))
    SET @Start = CHARINDEX('<br>', @HTMLText)
    SET @End = @Start + 3
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, CHAR(13) + CHAR(10))
        SET @Start = CHARINDEX('<br>', @HTMLText)
        SET @End = @Start + 3
        SET @Length = (@End - @Start) + 1
    END


    -- Substitui a tag <br/> pela sequência de nova linha (CHR(13) + CHR(10))
    SET @Start = CHARINDEX('<br/>', @HTMLText)
    SET @End = @Start + 4
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
        SET @Start = CHARINDEX('<br/>', @HTMLText)
        SET @End = @Start + 4
        SET @Length = (@End - @Start) + 1
    END

    -- Substitui a tag <br /> pela sequência de nova linha (CHR(13) + CHR(10))
    SET @Start = CHARINDEX('<br />', @HTMLText)
    SET @End = @Start + 5
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, 'CHAR(13) + CHAR(10)')
        SET @Start = CHARINDEX('<br />', @HTMLText)
        SET @End = @Start + 5
        SET @Length = (@End - @Start) + 1
    END


    -- Remove os parâmetros contidos nas tags HTML
    SET @Start = CHARINDEX('<', @HTMLText)
    SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
    SET @Length = (@End - @Start) + 1

    WHILE (@Start > 0 AND @End > 0 AND @Length > 0)
    BEGIN
        SET @HTMLText = STUFF(@HTMLText, @Start, @Length, '')
        SET @Start = CHARINDEX('<', @HTMLText)
        SET @End = CHARINDEX('>', @HTMLText, CHARINDEX('<', @HTMLText))
        SET @Length = (@End - @Start) + 1
    END


    RETURN LTRIM(RTRIM(@HTMLText))
	

END

Result:

Source code of the fncRemove_Formatacao_Html function

Another way to remove HTML tags and formatting from a string is to use the fncRemove_Formatacao_Html function, which I demonstrate below.

CREATE FUNCTION [dbo].[fncRemove_Formatacao_Html] (
    @pe_sTexto VARCHAR(MAX)
)
RETURNS VARCHAR(MAX)
AS 
BEGIN 

    DECLARE 
        @v_sTexto         VARCHAR(MAX),
        @v_iPosicaoInicio INT = 0,
        @v_iPosicaoFim    INT = 0,
        @v_iTotalPosicoes INT = 0

    SET @v_sTexto         = @pe_sTexto
    SET @v_sTexto         = REPLACE(@v_sTexto, '<BR><BR><BR>', '<BR>')
    SET @v_sTexto         = REPLACE(@v_sTexto, '<BR><BR>', '<BR>')
    SET @v_sTexto         = REPLACE(@v_sTexto, '<BR>', '. ')	    
    
    WHILE (1 = 1) 
    BEGIN

        SET @v_iPosicaoInicio = PATINDEX('%<%', @v_sTexto)
        SET @v_iPosicaoFim    = ISNULL(NULLIF(PATINDEX('%>%', @v_sTexto), 0), LEN(@v_sTexto))
        SET @v_iTotalPosicoes = ((@v_iPosicaoFim - @v_iPosicaoInicio) + 1)
        
        IF (@v_iPosicaoFim<@v_iPosicaoInicio)
            SET @v_sTexto = SUBSTRING(@v_sTexto, 1, @v_iPosicaoFim-1)+SUBSTRING(@v_sTexto, @v_iPosicaoFim+1, LEN(@v_sTexto))
        ELSE  
            IF (@v_iPosicaoInicio <> 0) 
                SET @v_sTexto = REPLACE(@v_sTexto, SUBSTRING(@v_sTexto, @v_iPosicaoInicio, @v_iTotalPosicoes), '') 
            ELSE
                BREAK;

        SET @v_iPosicaoInicio = 0 
        SET @v_iPosicaoFim    = 0 
        SET @v_iTotalPosicoes = 0 

    END

    SET @v_sTexto = REPLACE(@v_sTexto, '..', '. ')
    SET @v_sTexto = REPLACE(@v_sTexto, ',.', ', ')
    SET @v_sTexto = REPLACE(@v_sTexto, '?.', '? ')
    SET @v_sTexto = REPLACE(@v_sTexto, '!.', '! ')
    SET @v_sTexto = REPLACE(@v_sTexto, ',.', ', ')
    SET @v_sTexto = REPLACE(@v_sTexto, '.. ', '. ')
    SET @v_sTexto = REPLACE(@v_sTexto, ',. ', ', ')
    SET @v_sTexto = REPLACE(@v_sTexto, '?. ', '? ')
    SET @v_sTexto = REPLACE(@v_sTexto, '!. ', '! ')
    SET @v_sTexto = REPLACE(@v_sTexto, ',. ', ', ')
    SET @v_sTexto = REPLACE(@v_sTexto, '.  ', '. ')
    SET @v_sTexto = REPLACE(@v_sTexto, ',  ', ', ')
    SET @v_sTexto = REPLACE(@v_sTexto, '?  ', '? ')
    SET @v_sTexto = REPLACE(@v_sTexto, '!  ', '! ')
    SET @v_sTexto = REPLACE(@v_sTexto, ' . ', '. ')
    SET @v_sTexto = REPLACE(@v_sTexto, '&nbsp', '')
    SET @v_sTexto = RTRIM(@v_sTexto)
    SET @v_sTexto = LTRIM(@v_sTexto)
    

    RETURN @v_sTexto


END

Execution result:

If you want to use this same function, but with the source code in C# and using it in the CLR, for much better performance, see the post SQL Server – How to convert an HTML string to text (Remove HTML tags) using the CLR (C#).

A hug and see you later!