protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''> 说是道 "; protected void Page_Load(object sender,EventArgs e) { //string regexstr = @"<[^>]*>"; //去除所有的标签 //@"<script[^>]*?>.*?</script >" //去除所有脚本,中间部分也删除 // string regexstr = @"<img[^>]*>"; //去除图片的正则 // string regexstr = @"<(?!br).*?>"; //去除所有标签,只剩br // string regexstr = @"<table[^>]*?>.*?</table>"; //去除table里面的所有内容 string regexstr = @"<(?!img|br|p|/p).*?>"; //去除所有标签,只剩img,br,p str = Regex.Replace(str,regexstr,string.Empty,RegexOptions.IgnoreCase); } asp中正则表达式去除HTML标记(窃自eWebEditor) 2009年12月31日 星期四 下午 12:40 function ExecReg(re,content) Dim myRegExp,ResultString Set myRegExp = New RegExp myRegExp.Global = True myRegExp.Pattern = re ResultString = myRegExp.Replace(content,"" ) ExecReg = ResultString end function function DecodeFilter(html) html = LCase (html) ' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,html = ExecReg( " </?script[^>]*> ",html) html = ExecReg( " (javascript|jscript|vbscript|vbs): ",html) html = ExecReg( " on(mouse|exit|error|click|key) ",html) html = ExecReg( " &# ",html) ' 去除表格<table><tr><td><th><a><p><img><div> html = ExecReg( " </?table[^>]*> ",html) html = ExecReg( " </?tr[^>]*> ",html) html = ExecReg( " </?th[^>]*> ",html) html = ExecReg( " </?td[^>]*> ",html) html = ExecReg( " </?a[^>]*> ",html) html = ExecReg( " </?p[^>]*> ",html) html = ExecReg( " </?img[^>]*> ",html) html = ExecReg( " </?div[^>]*> ",html) html = ExecReg( " </?ul[^>]*> ",html) html = ExecReg( " </?li[^>]*> ",html) html = ExecReg( " </?tbody[^>]*> ",html) html = ExecReg( " </?h1[^>]*> ",html) html = ExecReg( " </?h2[^>]*> ",html) html = ExecReg( " </?h3[^>]*> ",html) html = ExecReg( " </?h4[^>]*> ",html) html = ExecReg( " </?h5[^>]*> ",html) html = ExecReg( " </?h6[^>]*> ",html) html = ExecReg( " </?b[^>]*> ",html) html = ExecReg( " </?strong[^>]*> ",html) ' 去除样式类class="" html = ExecReg( " (<[^>]+) class=[^ |^>]*([^>]*>) ",html) ' 去除样式style="" html = ExecReg( " (<[^>]+) style=""[^""]*""([^>]*>) ",html) ' 去除XML<?xml> html = ExecReg( " <\?xml[^>]*> ",html) ' 去除命名空间<o:p></o:p> html = ExecReg( " </?[a-z]+:[^>]*> ",html) ' 去除字体<font></font> html = ExecReg( " </?font[^>]*> ",html) ' 去除字幕<marquee></marquee> html = ExecReg( " </?marquee[^>]*> ",html) ' 去除对象<object><param><embed></object> html = ExecReg( " </?object[^>]*> ",html) html = ExecReg( " </?param[^>]*> ",html) html = ExecReg( " </?embed[^>]*> ",html) DecodeFilter = html end function Function RemoveHTML(strText) Dim RegEx Set RegEx = New RegExp RegEx.Pattern = "<[^>]*>" RegEx.Global = True RemoveHTML = RegEx.Replace(strText,"") End Function function nohtml(str) dim re Set re=new RegExp re.IgnoreCase =true re.Global=True re.Pattern="(\<.[^\<]*\>)" str=re.replace(str," ") re.Pattern="(\<\/[^\<]*\>)" str=re.replace(str," ") str=replace(str," ","") str=replace(str,"") nohtml=str set re=nothing end function
注:java中 "html内容".replaceAll("<[^>]*>","")
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。