Revision: 8179
Updated Code
at September 17, 2011 06:06 by wizard04
Updated Code
/*****************************************
* String encoding/decoding functions
*
* This work is licensed under a Creative Commons Attribution 3.0 Unported License
* http://creativecommons.org/licenses/by/3.0/
*
* Author: Andy Harrison, http://dragonzreef.com/
* Date: 16 September 2011
*****************************************/
String.prototype.encodeToUrlPart = function(){ return encodeURIComponent(this); };
String.prototype.decodeFromUrlPart = function(){ return decodeURIComponent(this); };
String.prototype.encodeToUrl = function(){ return encodeURI(this); };
String.prototype.decodeFromUrl = function(){ return decodeURI(this); };
//HTML-escapes amphersands that do not designate a character reference
String.prototype.encodeIncongruousAmphersands = function()
{
var str = this.toString();
if(!str) return "";
//escape amphersands that are obviously not designating character references
str = str.replace(/&#x([A-F0-9]*(?:[^A-F0-9;]|$))/ig, "&#x$1");
str = str.replace(/&#(?!x)([0-9]*(?:[^0-9;]|$))/ig, "&#$1");
str = str.replace(/&(?!#)([a-z0-9]*(?:[^a-z0-9;]|$))/ig, "&$1");
//for substrings formatted as character entity references, make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = ""+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams";
var rxp = new RegExp("&(?!("+validEntityNames+");)([a-zA-Z0-9]+;)", "g");
str.replace(rxp, "&$1");
return str;
};
String.prototype.encodeToHtml = function(keepValidEntities)
{
var str = this.toString();
if(!str) return "";
str = keepValidEntities ? str.encodeIncongruousAmphersands() : str.replace(/&/g, "&");
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
return str;
};
//unescapes all entities in the string, not just the markup-significant characters
String.prototype.decodeFromHtml = function()
{
var str = this.toString();
if(!str) return "";
var tmp = document.createElement("div");
tmp.innerHTML = str;
return tmp.firstChild.nodeValue;
};
//escapes the string for use as a JavaScript string in embedded or inline code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
String.prototype.encodeToJavaScriptString = function()
{
var str = this.toString();
if(!str) return "";
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); //tab, line feed, carriage return, next line
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); //line separator, paragraph separator
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
};
Revision: 8178
Updated Code
at September 12, 2008 16:14 by wizard04
Updated Code
function escapeToHTML(str)
{
str = str.replace(/&/g, "&");
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
return str;
}
function unescapeFromHTML(str)
{
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'");
str = str.replace(/&/g, "&");
return str;
}
//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
//Sometimes, setting an element attribute (e.g., title) with javascript automatically escapes all amphersands in the
// string, apparently to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
var tmp = document.createElement("div");
tmp.innerHTML = str;
return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}
Revision: 8177
Updated Code
at September 12, 2008 16:11 by wizard04
Updated Code
function escapeToHTML(str)
{
str = str.replace(/&/g, "&");
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
return str;
}
function unescapeFromHTML(str)
{
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'");
str = str.replace(/&/g, "&");
return str;
}
//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
var tmp = document.createElement("div");
tmp.innerHTML = str;
return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}
Revision: 8176
Updated Code
at September 12, 2008 16:11 by wizard04
Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes
//and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText
function escapeToHTML(str)
{
str = str.replace(/&/g, "&");
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
return str;
}
function unescapeFromHTML(str)
{
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'");
str = str.replace(/&/g, "&");
return str;
}
//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
var tmp = document.createElement("div");
tmp.innerHTML = str;
return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}
Revision: 8175
Updated Code
at September 12, 2008 16:09 by wizard04
Updated Code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInAttributes
//and http://code.google.com/p/doctype/wiki/ArticleXSSInBodyText
function escapeToHTML(str)
{
str = str.replace(/&/g, "&");
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
return str;
}
function unescapeFromHTML(str)
{
str = str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, "\"").replace(/'/g, "'");
str = str.replace(/&/g, "&");
return str;
}
//escapes the string for use within a script tag or inline event handler (e.g., an onclick attribute)
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
function escapeToScript(str)
{
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085");
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029");
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
}
//remove all tags from txt
function stripHTML(txt)
{
txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space
txt = txt.replace(/<.*?>/g, "");
txt = txt.replace(/</g, "<");
txt = txt.replace(/>/g, ">");
return txt;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
//Sometimes, setting an element attribute with javascript automatically escapes all amphersands in the string, apparently
// to be "helpful". It also does this when creating a text node with document.createTextNode.
//This function gets around this by setting innerHTML on a temporary element and returning the text node's value.
function literalText(str)
{
var tmp = document.createElement("div");
tmp.innerHTML = str;
return (tmp.firstChild ? tmp.firstChild.nodeValue : "");
}
Revision: 8174
Updated Code
at September 8, 2008 08:33 by wizard04
Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
function escapeToHTMLAttribute(str){
return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTMLAttribute(str){
return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
//remove all tags from txt
function stripHTML(txt)
{
txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space
txt = txt.replace(/<.*?>/g, "");
txt = txt.replace(/</g, "<");
txt = txt.replace(/>/g, ">");
return txt;
}
//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
//get rid of scripting
txt = txt.replace(/<script.*?<\/script>?/ig, "");
txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
//...what else?
return txt;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
function unFixAmphersands(txt)
{
if(!txt) return "";
return txt.replace(/&/ig, "&");
}
Revision: 8173
Updated Code
at September 7, 2008 01:00 by wizard04
Updated Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
function escapeToHTMLAttribute(str){
return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTMLAttribute(str){
return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
//remove all tags from txt
function stripHTML(txt)
{
txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space
txt = txt.replace(/<.*?>/g, "");
txt = txt.replace(/</g, "<");
txt = txt.replace(/>/g, ">");
return txt;
}
//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
//get rid of scripting
txt = txt.replace(/<script.*?<\/script>?/ig, "");
txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
//...what else?
return txt;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var validEntityNames = new RegExp("^("+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams)$");
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
if(!validEntityNames.test(mtch[i].slice(1,mtch[i].length-1)))
{
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
function unFixAmphersands(txt)
{
if(!txt) return "";
return txt.replace(/&/ig, "&");
}
Revision: 8172
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 5, 2008 13:43 by wizard04
Initial Code
function escapeToHTML(str){ return str.replace(/&/g, "&").replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTML(str){ return str.replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
function escapeToHTMLAttribute(str){
return str.replace(/&/g, "&").replace(/"/g, """).replace(/>/g, ">").replace(/</g, "<"); }
function unescapeFromHTMLAttribute(str){
return str.replace(/"/g, "\"").replace(/>/g, ">").replace(/</g, "<").replace(/&/g, "&"); }
//remove all tags from txt
function stripHTML(txt)
{
txt = txt.replace(/<\/?(br|p|div|ul|ol|li|blockquote|pre)\s*\/?>/ig, " "); //replace these with a space
txt = txt.replace(/<.*?>/g, "");
txt = txt.replace(/</g, "<");
txt = txt.replace(/>/g, ">");
return txt;
}
//remove scripting from txt; this is nowhere near thorough, obviously
function sanitizeHTML(txt)
{
//get rid of scripting
txt = txt.replace(/<script.*?<\/script>?/ig, "");
txt = txt.replace(/<a\s[^>]*href=(["'])\s*javascript:.*?\1.*?>(.*?)($|<\/a>)/ig, "$2");
txt = txt.replace(/<iframe.*?<\/iframe>?/ig, "");
//...what else?
return txt;
}
//fixes un-escaped amphersands
function fixAmphersands(txt)
{
if(!txt) return "";
var newText = txt;
do
{
txt = newText;
//order matters here!
newText = newText.replace(/&#x(((([a-f0-9]|[^a-f0-9;])*)[^a-f0-9;](([a-f0-9]|[^a-f0-9;])*))*)(;|$)/ig, "&#x$1$7");
newText = newText.replace(/&#([0-9]|[^x0-9;]|$)(((([0-9]|[^0-9;])*)[^0-9;](([0-9]|[^0-9;])*))*)(;|$)/ig, "&#$1$2$8");
newText = newText.replace(/&([a-z0-9]|[^a-z0-9#;]|$)(((([a-z0-9]|[^a-z0-9;])*)[^a-z0-9;](([a-z0-9]|[^a-z0-9;])*))*)(;|$)/ig, "&$1$2$8");
}while(txt != newText)
//make sure entity names are valid
//see http://www.w3.org/TR/html401/sgml/entities.html
var mtch = txt.match(/&[a-z0-9]+;/ig);
var rxp;
for(var i=0; mtch && i<mtch.length; i++)
{
switch(mtch[i].slice(1,mtch[i].length-1))
{
//markup-significant and internationalization characters
case "quot": case "amp": case "lt": case "gt": case "OElig": case "oelig": case "Scaron": case "scaron":
case "Yuml": case "circ": case "tilde": case "ensp": case "emsp": case "thinsp": case "zwnj": case "zwj":
case "lrm": case "rlm": case "ndash": case "mdash": case "lsquo": case "rsquo": case "sbquo": case "ldquo":
case "rdquo": case "bdquo": case "dagger": case "Dagger": case "permil": case "lsaquo": case "rsaquo":
case "euro":
break;
//ISO 8859-1 characters
case "nbsp": case "iexcl": case "cent": case "pound": case "curren": case "yen": case "brvbar": case "sect":
case "uml": case "copy": case "ordf": case "laquo": case "not": case "shy": case "reg": case "macr":
case "deg": case "plusmn": case "sup2": case "sup3": case "acute": case "micro": case "para": case "middot":
case "cedil": case "sup1": case "ordm": case "raquo": case "frac14": case "frac12": case "frac34":
case "iquest": case "Agrave": case "Aacute": case "Acirc": case "Atilde": case "Auml": case "Aring":
case "AElig": case "Ccedil": case "Egrave": case "Eacute": case "Ecirc": case "Euml": case "Igrave":
case "Iacute": case "Icirc": case "Iuml": case "ETH": case "Ntilde": case "Ograve": case "Oacute":
case "Ocirc": case "Otilde": case "Ouml": case "times": case "Oslash": case "Ugrave": case "Uacute":
case "Ucirc": case "Uuml": case "Yacute": case "THORN": case "szlig": case "agrave": case "aacute":
case "acirc": case "atilde": case "auml": case "aring": case "aelig": case "ccedil": case "egrave":
case "eacute": case "ecirc": case "euml": case "igrave": case "iacute": case "icirc": case "iuml": case "eth":
case "ntilde": case "ograve": case "oacute": case "ocirc": case "otilde": case "ouml": case "divide":
case "oslash": case "ugrave": case "uacute": case "ucirc": case "uuml": case "yacute": case "thorn":
case "yuml":
break;
//symbols, mathematical symbols, and Greek letters
case "fnof": case "Alpha": case "Beta": case "Gamma": case "Delta": case "Epsilon": case "Zeta": case "Eta":
case "Theta": case "Iota": case "Kappa": case "Lambda": case "Mu": case "Nu": case "Xi": case "Omicron":
case "Pi": case "Rho": case "Sigma": case "Tau": case "Upsilon": case "Phi": case "Chi": case "Psi":
case "Omega": case "alpha": case "beta": case "gamma": case "delta": case "epsilon": case "zeta": case "eta":
case "theta": case "iota": case "kappa": case "lambda": case "mu": case "nu": case "xi": case "omicron":
case "pi": case "rho": case "sigmaf": case "sigma": case "tau": case "upsilon": case "phi": case "chi":
case "psi": case "omega": case "thetasym": case "upsih": case "piv": case "bull": case "hellip": case "prime":
case "Prime": case "oline": case "frasl": case "weierp": case "image": case "real": case "trade":
case "alefsym": case "larr": case "uarr": case "rarr": case "darr": case "harr": case "crarr": case "lArr":
case "uArr": case "rArr": case "dArr": case "hArr": case "forall": case "part": case "exist": case "empty":
case "nabla": case "isin": case "notin": case "ni": case "prod": case "sum": case "minus": case "lowast":
case "radic": case "prop": case "infin": case "ang": case "and": case "or": case "cap": case "cup": case "int":
case "there4": case "sim": case "cong": case "asymp": case "ne": case "equiv": case "le": case "ge":
case "sub": case "sup": case "nsub": case "sube": case "supe": case "oplus": case "otimes": case "perp":
case "sdot": case "lceil": case "rceil": case "lfloor": case "rfloor": case "lang": case "rang": case "loz":
case "spades": case "clubs": case "hearts": case "diams":
break;
default:
//escape the amphersand
rxp = new RegExp(mtch[i], "g");
txt = txt.replace(rxp, "&"+mtch[i].slice(1));
}
}
return txt;
}
function unFixAmphersands(txt)
{
if(!txt) return "";
return txt.replace(/&/ig, "&");
}
Initial URL
Initial Description
Initial Title
URL and HTML Encoding
Initial Tags
url, javascript, html
Initial Language
JavaScript