/ Published in: PHP
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
<?php // strip javascript, styles, html tags, normalize entities and spaces // based on http://www.php.net/manual/en/function.strip-tags.php#68757 function html2text($html){ $text = $html; '@<script.+?</script>@usi', // Strip out javascript content '@<style.+?</style>@usi', // Strip style content '@<!--.+?-->@us', // Strip multi-line comments including CDATA '@</?[a-z].*?\>@usi', // Strip out HTML tags ); // normalize common entities $text = normalizeEntities($text); // decode other entities // normalize possibly repeated newlines, tabs, spaces to spaces // we must still run htmlentities on anything that comes out! // for instance: // <<a>script>alert('XSS')//<<a>/script> // will become // <script>alert('XSS')//</script> return $text; } // replace encoded and double encoded entities to equivalent unicode character // also see /app/bookmarkletPopup.js function normalizeEntities($text) { // build $find and $replace from map one time ); foreach ($map as $e) { $code = $e[$i]; // numeric entity $regex = "/&(amp;)?#0*$code;/"; } // single character $regex = "/$code/u"; } // hex entity } else { // named entity $regex = "/&(amp;)?$code;/"; } $find[] = $regex; $repl[] = $e[0]; } } } // end first time build }