Revision: 20081
Updated Code
at November 9, 2009 05:19 by tillkruess
Updated Code
function clean_xhtml($string, $keep_tags = null) {
if (!$keep_tags) {
$keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s';
}
// replace
with \n
$string = preg_replace('~
~m', "\n", $string);
// replace \r with \n
$string = preg_replace('~\r~m', "\n", $string);
// remove whitespace from the beginnig
$string = preg_replace('~^\s+~s', '', $string);
// remove whitespace from the end
$string = preg_replace('~\s+$~s', '', $string);
// store all tag which should remain the same
preg_match_all($keep_regexp, $string, $original_tags);
// remove whitespace from the beginning of each line
$string = preg_replace('~^\s+~m', '', $string);
// remove whitespace from the end of each line
$string = preg_replace('~\s+$~m', '', $string);
// removes empty lines
$string = preg_replace('~\n\s*\n~ms', "\n", $string);
// removes line breaks inside normal text
$string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string);
// correct indention
$indent = 0;
$string = explode("\n", $string);
foreach ($string as &$line) {
$correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag
$line = str_repeat("\t", $indent - $correction).$line;
$indent += substr_count($line, '<'); // indent every tag
$indent -= substr_count($line, '<!'); // subtract doctype declaration
$indent -= substr_count($line, '<?'); // subtract processing instructions
$indent -= substr_count($line, '/>'); // subtract self closing tags
$indent -= substr_count($line, '</') * 2; // subtract closing tags
}
$string = implode("\n", $string);
// fetch all tag which could been changed
preg_match_all($keep_regexp, $string, $current_tags);
// restore all stored tags
foreach ($current_tags[0] as $key => $match) {
$string = str_replace($match, $original_tags[0][$key], $string);
}
return $string;
}
Revision: 20080
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 6, 2009 09:54 by tillkruess
Initial Code
function clean_xhtml($string, $keep_tags = null) {
if (!$keep_tags) {
$keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s';
}
// replace
with \n
$string = preg_replace('~
~m', "\n", $string);
// replace \r with \n
$string = preg_replace('~\r~m', "\n", $string);
// remove whitespace from the beginnig
$string = preg_replace('~^\s+~s', '', $string);
// remove whitespace from the end
$string = preg_replace('~\s+$~s', '', $string);
// store all tag which should remain the same
preg_match_all($keep_regexp, $string, $original_tags);
// remove whitespace from the beginning of each line
$string = preg_replace('~^\s+~m', '', $string);
// remove whitespace from the end of each line
$string = preg_replace('~\s+$~m', '', $string);
// removes empty lines
$string = preg_replace('~\n\s*\n~ms', "\n", $string);
// removes line breaks inside normal text
$string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string);
// correct indention
$indent = 0;
$string = explode("\n", $string);
foreach ($string as &$line) {
$correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag
$line = str_repeat("\t", $indent - $correction).$line;
$indent += substr_count($line, '<'); // indent every tag
$indent -= substr_count($line, '<!'); // subtract doctype declaration
$indent -= substr_count($line, '<?'); // subtract processing instructions
$indent -= substr_count($line, '/>'); // subtract self closing tags
$indent -= substr_count($line, '</') * 2; // subtract closing tags
}
$string = implode("\n", $string);
// fetch all tag which could been changed
preg_match_all($keep_regexp, $string, $current_tags);
// restore all stored tags
foreach ($current_tags[0] as $key => $match) {
$string = str_replace($match, $original_tags[0][$key], $string);
}
return $string;
}
Initial URL
http://pralinenschachtel.de/
Initial Description
Do you every wanted to clean and perfectly intent your xhtml document?
Initial Title
Clean / Indent XHTML Document
Initial Tags
html, xhtml
Initial Language
PHP