您可以使用SQL函数是这样的:
ALTER FUNCTION [dbo].[StripOutHTML]
(
@HTMLText VARCHAR(max),
@stripDisallowedOnly BIT
)
returns VARCHAR(max)
AS
BEGIN
DECLARE @Start INT
DECLARE @End INT
DECLARE @Length INT
-- Replace the HTML entity & with the '&' character (this needs to be done first, as
-- '&' might be double encoded as '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity < with the '<' character
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '<')
SET @Start = Charindex('<', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity > with the '>' character
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '>')
SET @Start = Charindex('>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity & with the '&' character
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '&')
SET @Start = Charindex('&amp;', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace the HTML entity with the ' ' character
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, ' ')
SET @Start = Charindex(' ', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Replace any <P>, </P>tags with a <BR>, so they will be replaced with a new line in next step
SET @HTMLText = REPLACE(@HTMLText, '<P>', '<br>')
SET @HTMLText = REPLACE(@HTMLText, '</P>', '<br>')
-- Replace any <BR> tags with a newline
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
Char(13) + Char(10))
SET @Start = Charindex('<br>', @HTMLText)
SET @End = @Start + 3
SET @Length = (@End - @Start) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br/>', @HTMLText)
SET @End = @Start + 4
SET @Length = (@End - @Start) + 1
END
-- Replace any tags with a newline
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length,
'CHAR(13) + CHAR(10)')
SET @Start = Charindex('<br />', @HTMLText)
SET @End = @Start + 5
SET @Length = (@End - @Start) + 1
END
-- Remove anything between tags
SET @Start = Charindex('<', @HTMLText)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText))
SET @Length = (@End - @Start) + 1
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF (Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</B')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<U')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</U')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<I')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</I')
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = (@End - @Start) + 1
END
-- Remove any leading space/carriage return
DECLARE @trimchars VARCHAR(10)
SET @trimchars = CHAR(9)+CHAR(10)+CHAR(13)+CHAR(32)
IF @HTMLText LIKE '[' + @trimchars + ']%' SET @HTMLText = SUBSTRING(@HTMLText, PATINDEX('%[^' + @trimchars + ']%', @HTMLText), LEN(@HTMLText))
RETURN Ltrim(Rtrim(@HTMLText))
END
您需要修改以下行,以保持类似em
标签:(Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
:SQL函数不具有替代<BR>
和<P>
标签新线,但你可以很容易地删除这些线,如果你不需要。希望这可以帮助你或指出你在一个正确的方向
WHILE (@Start > 0
AND @End > 0
AND @Length > 0)
BEGIN
IF @stripDisallowedOnly = 1
BEGIN
IF (Upper(Substring(@HTMLText, @Start, 2)) <> '<B')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</B')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<U')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</U')
AND (Upper(Substring(@HTMLText, @Start, 2)) <> '<I')
AND (Upper(Substring(@HTMLText, @Start, 3)) <> '</I')
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
ELSE
BEGIN
SET @Length = 0
END
END
ELSE
BEGIN
SET @HTMLText = Stuff(@HTMLText, @Start, @Length, '')
END
SET @Start = Charindex('<', @HTMLText, @End - @Length)
SET @End = Charindex('>', @HTMLText, Charindex('<', @HTMLText,
@Start)
)
SET @Length = (@End - @Start) + 1
END
这将工作与形成不良的HTML。我需要删除adsf的属性没有引用,没有HREF – tsdexter 2012-04-19 20:08:02
Sql Server不适合这样的事情,但我敢肯定你可以在你的输入html字符串上应用** HTML Tidy **,然后提交它进行处理 – 2012-04-19 20:12:29
因为这是用新数据替换旧的不正确数据的一部分,并且旧数据非常具体(即:html标签全部大写,而我想保留的大写字母小写)我能够更轻松地做到这一点使用PATINDEX而不是CHARINDEX - 请参阅我的答案。 – tsdexter 2012-04-19 20:26:02