将特殊字符转换为Javascript中的HTML


Answers:


77

您需要一个功能类似于

return mystring.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;").replace(/"/g, "&quot;");

但是要考虑到您希望对单/双引号进行不同处理的愿望。


斜杠g有什么作用?
JohnnyBizzle

5
@JohnnyBizzle /g在正则表达式中表示“全局”。简而言之,所有出现的字符串将被替换。没有/g只有第一场比赛将被替换。
凯文·金贝尔

207

我认为最好的方法是使用浏览器的内置HTML转义功能来处理许多情况。为此,只需在DOM树中创建一个元素并将innerText该元素的设置为您的字符串。然后检索innerHTML的元素。浏览器将返回HTML编码的字符串。

function HtmlEncode(s)
{
  var el = document.createElement("div");
  el.innerText = el.textContent = s;
  s = el.innerHTML;
  return s;
}

测试运行:

alert(HtmlEncode('&;\'><"'));

输出:

&amp;;'&gt;&lt;"

Prototype JS库也使用这种转义HTML的方法,尽管与我给出的简单示例有所不同。

注意:您仍然需要自己对引号(双引号和单引号)进行转义。您可以在此处使用其他人概述的任何方法。


3
请注意,这delete el是一个错误。perfectionkills.com/understanding-delete
gblazex 2011年

当我尝试时,这对我没有任何帮助。我把字符恢复不变。
莫斯

1
抱歉,我正在测试奇数字符,再加上Chrome有点偷偷摸摸,没有显示真正的HTML输出,但是Firebug却显示了(实际上,当生成的源未对其进行编码时,它显示了版权符号的html实体)。这确实可以用,<>&但是不像Neotropic或KooiInc的解决方案那样全面。
莫斯,

20
与jQuery,output = $('<div>').text(input).html()
2012年

6
两种方法都不将“转换为”。和“成&QUOT;因此,它仍然可以用于XSS攻击。
有人

32

此泛型函数将每个非字母字符编码为其htmlcode(数字):

function HTMLEncode(str) {
    var i = str.length,
        aRet = [];

    while (i--) {
        var iC = str[i].charCodeAt();
        if (iC < 65 || iC > 127 || (iC>90 && iC<97)) {
            aRet[i] = '&#'+iC+';';
        } else {
            aRet[i] = str[i];
        }
    }
    return aRet.join('');
}

这听起来确实很聪明,但我只能将其转换为基本知识:<>&
Moss

虚拟机 它可以在控制台中正常运行,但是当您输出到浏览器时,它似乎尚未转换内容。这是怎么回事?
莫斯,

@Moss:浏览器将html编码的字符呈现为其表示的字符。html编码字符的优点是浏览器不必猜测(例如)变音字符的翻译,因此总是像渲染那些字符一样呈现这些字符。
KooiInc 2011年

您可能会考虑更改此设置以从str中删除类似数组的访问。IE7及以下版本不支持此功能,您可以像使用i作为参数一样在str的右边直接调用charCodeAt。var iC = str.charCodeAt(i)
Chase 2012年

此代码无法为±字符生成正确的HTML实体值,该值应为&#177;。但它正在返回&#65533; 这是一个未知字符 。
Paul

21

从Mozilla ...

请注意,charCodeAt将始终返回小于65,536的值。这是因为较高的代码点由一对(较低值的)“替代”伪字符表示,这些伪字符用于构成实字符。因此,为了检查或重现值大于等于65,536的单个字符的完整字符,对于此类字符,不仅要检索charCodeAt(i),而且还要检索charCodeAt(i + 1)(就像检查/生成带有两个>字母的字符串)。

最好的解决方案

/**
 * (c) 2012 Steven Levithan <http://slevithan.com/>
 * MIT license
 */
if (!String.prototype.codePointAt) {
    String.prototype.codePointAt = function (pos) {
        pos = isNaN(pos) ? 0 : pos;
        var str = String(this),
            code = str.charCodeAt(pos),
            next = str.charCodeAt(pos + 1);
        // If a surrogate pair
        if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
            return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
        }
        return code;
    };
}

/**
 * Encodes special html characters
 * @param string
 * @return {*}
 */
function html_encode(string) {
    var ret_val = '';
    for (var i = 0; i < string.length; i++) { 
        if (string.codePointAt(i) > 127) {
            ret_val += '&#' + string.codePointAt(i) + ';';
        } else {
            ret_val += string.charAt(i);
        }
    }
    return ret_val;
}

用法示例:

html_encode("✈");

21

创建一个使用字符串的函数 replace

function convert(str)
{
  str = str.replace(/&/g, "&amp;");
  str = str.replace(/>/g, "&gt;");
  str = str.replace(/</g, "&lt;");
  str = str.replace(/"/g, "&quot;");
  str = str.replace(/'/g, "&#039;");
  return str;
}

我面临的问题,只有单引号(“)和双引号(“)在我的输入值显示HTML脚本,如果用户添加它被打破。
Dharam马里

13

对于那些想要像字符串内部那样解码整数字符代码的人&#xxx;,请使用以下函数:

function decodeHtmlCharCodes(str) { 
  return str.replace(/(&#(\d+);)/g, function(match, capture, charCode) {
    return String.fromCharCode(charCode);
  });
}

// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

ES6

const decodeHtmlCharCodes = str => 
  str.replace(/(&#(\d+);)/g, (match, capture, charCode) => 
    String.fromCharCode(charCode));

// Will output "The show that gained int’l reputation’!"
console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));


3
这应该是可接受的答案,因为这将解码所有内容。
Quesofat

这是我一直在寻找的答案。谢谢。
Dzenis H.

8
function char_convert() {

    var chars = ["©","Û","®","ž","Ü","Ÿ","Ý","$","Þ","%","¡","ß","¢","à","£","á","À","¤","â","Á","¥","ã","Â","¦","ä","Ã","§","å","Ä","¨","æ","Å","©","ç","Æ","ª","è","Ç","«","é","È","¬","ê","É","­","ë","Ê","®","ì","Ë","¯","í","Ì","°","î","Í","±","ï","Î","²","ð","Ï","³","ñ","Ð","´","ò","Ñ","µ","ó","Õ","¶","ô","Ö","·","õ","Ø","¸","ö","Ù","¹","÷","Ú","º","ø","Û","»","ù","Ü","@","¼","ú","Ý","½","û","Þ","€","¾","ü","ß","¿","ý","à","‚","À","þ","á","ƒ","Á","ÿ","å","„","Â","æ","…","Ã","ç","†","Ä","è","‡","Å","é","ˆ","Æ","ê","‰","Ç","ë","Š","È","ì","‹","É","í","Œ","Ê","î","Ë","ï","Ž","Ì","ð","Í","ñ","Î","ò","‘","Ï","ó","’","Ð","ô","“","Ñ","õ","”","Ò","ö","•","Ó","ø","–","Ô","ù","—","Õ","ú","˜","Ö","û","™","×","ý","š","Ø","þ","›","Ù","ÿ","œ","Ú"]; 
    var codes = ["&copy;","&#219;","&reg;","&#158;","&#220;","&#159;","&#221;","&#36;","&#222;","&#37;","&#161;","&#223;","&#162;","&#224;","&#163;","&#225;","&Agrave;","&#164;","&#226;","&Aacute;","&#165;","&#227;","&Acirc;","&#166;","&#228;","&Atilde;","&#167;","&#229;","&Auml;","&#168;","&#230;","&Aring;","&#169;","&#231;","&AElig;","&#170;","&#232;","&Ccedil;","&#171;","&#233;","&Egrave;","&#172;","&#234;","&Eacute;","&#173;","&#235;","&Ecirc;","&#174;","&#236;","&Euml;","&#175;","&#237;","&Igrave;","&#176;","&#238;","&Iacute;","&#177;","&#239;","&Icirc;","&#178;","&#240;","&Iuml;","&#179;","&#241;","&ETH;","&#180;","&#242;","&Ntilde;","&#181;","&#243;","&Otilde;","&#182;","&#244;","&Ouml;","&#183;","&#245;","&Oslash;","&#184;","&#246;","&Ugrave;","&#185;","&#247;","&Uacute;","&#186;","&#248;","&Ucirc;","&#187;","&#249;","&Uuml;","&#64;","&#188;","&#250;","&Yacute;","&#189;","&#251;","&THORN;","&#128;","&#190;","&#252","&szlig;","&#191;","&#253;","&agrave;","&#130;","&#192;","&#254;","&aacute;","&#131;","&#193;","&#255;","&aring;","&#132;","&#194;","&aelig;","&#133;","&#195;","&ccedil;","&#134;","&#196;","&egrave;","&#135;","&#197;","&eacute;","&#136;","&#198;","&ecirc;","&#137;","&#199;","&euml;","&#138;","&#200;","&igrave;","&#139;","&#201;","&iacute;","&#140;","&#202;","&icirc;","&#203;","&iuml;","&#142;","&#204;","&eth;","&#205;","&ntilde;","&#206;","&ograve;","&#145;","&#207;","&oacute;","&#146;","&#208;","&ocirc;","&#147;","&#209;","&otilde;","&#148;","&#210;","&ouml;","&#149;","&#211;","&oslash;","&#150;","&#212;","&ugrave;","&#151;","&#213;","&uacute;","&#152;","&#214;","&ucirc;","&#153;","&#215;","&yacute;","&#154;","&#216;","&thorn;","&#155;","&#217;","&yuml;","&#156;","&#218;"];

    for(x=0; x<chars.length; x++){
        for (i=0; i<arguments.length; i++){
            arguments[i].value = arguments[i].value.replace(chars[x], codes[x]);
        }
    }
 }

char_convert(this);

1
这很好用。,但是由于某些原因,当与某些JQuery功能结合使用时,它会失效。有时会转换成一些,或仅转换成几对。但总的来说,效果很好。onBlur =“ char_convert(this);”
Neotropic

嗯,我在Chrome中收到错误“未捕获的TypeError:无法调用未定义的方法'replace'”,而在Firebug中收到了“ arguments [i] .value is undefined”。
莫斯,

将所有这些特殊字符放入这样的数组中是完全没有意义的。查看其他答案。
加文

对我来说是最好的解决方案,唯一将í转换为&iacute;的解决方案。例如。
爱德华

您如何从键盘上获取这些字符?我知道这是一个愚蠢的问题,例如在OS X中
PositiveGuy

7

正如dragon最干净的方法提到的是jQuery

function HtmlEncode(s) {
    return $('<div>').text(s).html();
}

function HtmlDecode(s) {
    return $('<div>').html(s).text();
}

有趣,但是如果您的字符串包含空格,则不会改变它。更好的方法是使用encodeURI(yourString);
Mike Gledhill 2014年

6
函数ConvChar(str){
  c = {'<':'&lt;','>':'&gt;','&':'&amp;','“':'”“,”'“:'&#039;',
       '#':'&#035;' };
  返回str.replace(/ [<&>'“#] / g,函数(return c [s];});
}

alert(ConvChar('<-“-&-”->-<-\'-#-\'->'));

结果:

&lt;-&gt;-&amp;-&gt;-&lt;-&#039;-&#035;-&#039;-&gt;

在testarea标签中:

<-“-&-”->-<-'-#-'->

如果您只是在长代码中更改一些字符,...


4

PRE标记以及大多数其他HTML标记中,使用输出重定向字符(<和>)的批处理文件的纯文本将破坏HTML,但这是我的提示TEXTAREA元素中包含所有内容-不会破坏HTML,主要是因为我们位于由OS实例化和处理的控件内,因此HTML引擎未解析其内容。

例如,假设我要使用JavaScript突出显示批处理文件的语法。我只是简单地将代码粘贴到文本区域中,而不必担心HTML保留字符,并让脚本处理innerHTMLtextarea 的属性,该属性的结果是文本被HTML保留字符替换为相应的ISO-8859-1实体。

当您检索元素的innerHTML(和outerHTML)属性时,浏览器将自动转义特殊字符。使用textarea(谁知道,也许是类型的文本输入)只会使您免于进行转换(手动或通过代码)。

我使用此技巧来测试语法荧光笔,当完成创作和测试后,我只是从视图中隐藏了文本区域。



3

解决方法:

var temp = $("div").text("<");
var afterEscape = temp.html(); // afterEscape == "&lt;"

2
var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 169, 61558, 8226, 61607);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&copy;", "&bull;", "&bull;", "&bull;");

var TextCheck = {
    doCWBind:function(div){
        $(div).bind({
            bind:function(){
                TextCheck.cleanWord(div);
            },
            focus:function(){
                TextCheck.cleanWord(div);
            },
            paste:function(){
                TextCheck.cleanWord(div);
            }
        }); 
    },
    cleanWord:function(div){
        var output = $(div).val();
        for (i = 0; i < swapCodes.length; i++) {
            var swapper = new RegExp("\\u" + swapCodes[i].toString(16), "g");
            output = output.replace(swapper, swapStrings[i]);
        }
        $(div).val(output);
    }
}

我们现在使用的另一个可行。上面我有一个调用脚本,并返回转换后的代码。仅适用于小文本区域(表示文章/博客等内容不完整...)


对于以上。适用于大多数字符。

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 61558, 8226, 61607,161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 402);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&bull;", "&bull;", "&bull;", "&iexcl;", "&cent;", "&pound;", "&curren;", "&yen;", "&brvbar;", "&sect;", "&uml;", "&copy;", "&ordf;", "&laquo;", "&not;", "&shy;", "&reg;", "&macr;", "&deg;", "&plusmn;", "&sup2;", "&sup3;", "&acute;", "&micro;", "&para;", "&middot;", "&cedil;", "&sup1;", "&ordm;", "&raquo;", "&frac14;", "&frac12;", "&frac34;", "&iquest;", "&Agrave;", "&Aacute;", "&Acirc;", "&Atilde;", "&Auml;", "&Aring;", "&AElig;", "&Ccedil;", "&Egrave;", "&Eacute;", "&Ecirc;", "&Euml;", "&Igrave;", "&Iacute;", "&Icirc;", "&Iuml;", "&ETH;", "&Ntilde;", "&Ograve;", "&Oacute;", "&Ocirc;", "&Otilde;", "&Ouml;", "&times;", "&Oslash;", "&Ugrave;", "&Uacute;", "&Ucirc;", "&Uuml;", "&Yacute;", "&THORN;", "&szlig;", "&agrave;", "&aacute;", "&acirc;", "&atilde;", "&auml;", "&aring;", "&aelig;", "&ccedil;", "&egrave;", "&eacute;", "&ecirc;", "&euml;", "&igrave;", "&iacute;", "&icirc;", "&iuml;", "&eth;", "&ntilde;", "&ograve;", "&oacute;", "&ocirc;", "&otilde;", "&ouml;", "&divide;", "&oslash;", "&ugrave;", "&uacute;", "&ucirc;", "&uuml;", "&yacute;", "&thorn;", "&yuml;", "&#338;", "&#339;", "&#352;", "&#353;", "&#376;", "&#402;");

我创建了一个JavaScript文件,该文件具有很多功能,包括上述功能。 http://www.neotropicsolutions.com/JSChars.zip

包括所有需要的文件。我添加了jQuery 1.4.4。仅仅是因为我看到了其他版本的问题,但尚未尝试。

Requires: jQuery & jQuery Impromptu from: http://trentrichardson.com/Impromptu/index.php

1. Word Count
2. Character Conversion
3. Checks to ensure this is not passed: "notsomeverylongstringmissingspaces"
4. Checks to make sure ALL IS NOT ALL UPPERCASE.
5. Strip HTML

    // Word Counter
    $.getScript('js/characters.js',function(){
            $('#adtxt').bind("keyup click blur focus change paste",
                function(event){
                    TextCheck.wordCount(30, "#adtxt", "#adtxt_count", event);
            });
            $('#adtxt').blur(
                function(event){
                    TextCheck.check_length('#adtxt'); // unsures properly spaces-not one long word
                    TextCheck.doCWBind('#adtxt');// char conversion
            });

            TextCheck.wordCount(30, "#adtxt", "#adtxt_count", false);
        });

    //HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" class="wordCount"></textarea>
<div id="adtxt_count" class="clear"></div>

    // Just Character Conversions:
    TextCheck.doCWBind('#myfield');

    // Run through form fields in a form for case checking.
    // Alerts user when field is blur'd.
    var labels = new Array("Brief Description","Website URL","Contact Name","Website","Email","Linkback URL");
    var checking = new Array("descr","title","fname","website","email","linkback");
    TextCheck.check_it(checking,labels);

    // Extra security to check again, make sure form is not submitted
    var pass = TextCheck.validate(checking,labels);
    if(pass){
        //do form actions
    }


    //Strip HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" onblur="TextCheck.stripHTML(this);"></textarea>

2
 <!doctype html>
    <html lang="en">
    <head>
      <meta charset="utf-8">
      <title>html</title>  
      <script>
      $(function() {   
      document.getElementById('test').innerHTML = "&amp;";
      });

      </script>
    </head>
    <body>
    <div id="test"></div>
    </body>
    </html>

您可以使用上述代码将特殊字符简单地转换为html。


2

我发现这是一个很好的库,在这种情况下非常有用。

https://github.com/mathiasbynens/he

根据其作者:

它支持HTML格式的所有标准化命名字符引用,就像浏览器一样处理歧义的“&”号和其他边缘情况,具有广泛的测试套件,并且-与许多其他JavaScript解决方案相反-他可以很好地处理星形Unicode符号


这个工具github.com/mathiasbynens/he对于Mathias来说非常好,并且还具有在线游乐场
Mohamed Hussain

1

这是我不需要Jquery使用的几种方法:

您可以编码字符串中的每个字符

function encode(e){return e.replace(/[^]/g,function(e){return"&#"+e.charCodeAt(0)+";"})}

或只针对主要的安全编码字符担心(&,inebreaks,<,>,“和'),例如:

function encode(r){
return r.replace(/[\x26\x0A\<>'"]/g,function(r){return"&#"+r.charCodeAt(0)+";"})
}

test.value=encode('How to encode\nonly html tags &<>\'" nice & fast!');

/*************
* \x26 is &ampersand (it has to be first),
* \x0A is newline,
*************/
<textarea id=test rows="9" cols="55">www.WHAK.com</textarea>


0
function escape (text)
{
  return text.replace(/[<>\&\"\']/g, function(c) {
    return '&#' + c.charCodeAt(0) + ';';
  });
}

alert(escape("<>&'\""));

0

这并不能直接回答您的问题,但是如果您是innerHTML为了在元素内编写文本而遇到编码问题,则只需使用textContent,即:

var s = "Foo 'bar' baz <qux>";

var element = document.getElementById('foo');
element.textContent = s;

// <div id="foo">Foo 'bar' baz <qux></div>

0

我们可以使用javascript DOMParser进行特殊字符转换。

const parser = new DOMParser();
const convertedValue = (parser.parseFromString("&#039 &amp &#039 &lt &gt", "application/xml").body.innerText;

0

以下是在JS中编码xml转义字符的简单函数

Encoder.htmlEncode(unsafeText);


0

您可以通过将函数.text()替换为.html()来修复它。它为我工作。


-1
<html>
<body>
<script type="text/javascript">
var str= "&\"'<>";
alert('B4 Change:\n' + str);
str= str.replace(/\&/g,'&amp;');
str= str.replace(/</g,'&lt;');
str= str.replace(/>/g,'&gt;');
str= str.replace(/\"/g,'&quot;');
str= str.replace(/\'/g,'&#039;');
alert('After change:\n' + str);
</script>
</body>
</html>      

使用它来测试:http : //www.w3schools.com/js/tryit.asp?filename=tryjs_text


-1

是的,但是如果您需要将结果字符串插入某处而不进行转换,则需要执行以下操作:

str.replace(/'/g,"&amp;amp;#39;"); // and so on


-4

使用javaScript函数escape(),它可以对字符串进行编码。

例如,

escape("yourString");

1
用于输入非HTML的URL的编码(并且该功能由于Unicode而被破坏,因此无论如何都已弃用)。
昆汀2012年

-4
public static string HtmlEncode (string text)
{
    string result;
    using (StringWriter sw = new StringWriter())
    {
        var x = new HtmlTextWriter(sw);
        x.WriteEncodedText(text);
        result = sw.ToString();
    }
    return result;

}

这个解决方案甚至是JavaScript代码吗?因为对我来说看起来像JAVA或C#。
TK先生

这不是Javascript。
迭戈·福特斯
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.