Revision: 20081
Updated Code
at November 9, 2009 05:19 by tillkruess
Updated Code
function clean_xhtml($string, $keep_tags = null) { if (!$keep_tags) { $keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s'; } // replace with \n $string = preg_replace('~ ~m', "\n", $string); // replace \r with \n $string = preg_replace('~\r~m', "\n", $string); // remove whitespace from the beginnig $string = preg_replace('~^\s+~s', '', $string); // remove whitespace from the end $string = preg_replace('~\s+$~s', '', $string); // store all tag which should remain the same preg_match_all($keep_regexp, $string, $original_tags); // remove whitespace from the beginning of each line $string = preg_replace('~^\s+~m', '', $string); // remove whitespace from the end of each line $string = preg_replace('~\s+$~m', '', $string); // removes empty lines $string = preg_replace('~\n\s*\n~ms', "\n", $string); // removes line breaks inside normal text $string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string); // correct indention $indent = 0; $string = explode("\n", $string); foreach ($string as &$line) { $correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag $line = str_repeat("\t", $indent - $correction).$line; $indent += substr_count($line, '<'); // indent every tag $indent -= substr_count($line, '<!'); // subtract doctype declaration $indent -= substr_count($line, '<?'); // subtract processing instructions $indent -= substr_count($line, '/>'); // subtract self closing tags $indent -= substr_count($line, '</') * 2; // subtract closing tags } $string = implode("\n", $string); // fetch all tag which could been changed preg_match_all($keep_regexp, $string, $current_tags); // restore all stored tags foreach ($current_tags[0] as $key => $match) { $string = str_replace($match, $original_tags[0][$key], $string); } return $string; }
Revision: 20080
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 6, 2009 09:54 by tillkruess
Initial Code
function clean_xhtml($string, $keep_tags = null) { if (!$keep_tags) { $keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s'; } // replace with \n $string = preg_replace('~ ~m', "\n", $string); // replace \r with \n $string = preg_replace('~\r~m', "\n", $string); // remove whitespace from the beginnig $string = preg_replace('~^\s+~s', '', $string); // remove whitespace from the end $string = preg_replace('~\s+$~s', '', $string); // store all tag which should remain the same preg_match_all($keep_regexp, $string, $original_tags); // remove whitespace from the beginning of each line $string = preg_replace('~^\s+~m', '', $string); // remove whitespace from the end of each line $string = preg_replace('~\s+$~m', '', $string); // removes empty lines $string = preg_replace('~\n\s*\n~ms', "\n", $string); // removes line breaks inside normal text $string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string); // correct indention $indent = 0; $string = explode("\n", $string); foreach ($string as &$line) { $correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag $line = str_repeat("\t", $indent - $correction).$line; $indent += substr_count($line, '<'); // indent every tag $indent -= substr_count($line, '<!'); // subtract doctype declaration $indent -= substr_count($line, '<?'); // subtract processing instructions $indent -= substr_count($line, '/>'); // subtract self closing tags $indent -= substr_count($line, '</') * 2; // subtract closing tags } $string = implode("\n", $string); // fetch all tag which could been changed preg_match_all($keep_regexp, $string, $current_tags); // restore all stored tags foreach ($current_tags[0] as $key => $match) { $string = str_replace($match, $original_tags[0][$key], $string); } return $string; }
Initial URL
http://pralinenschachtel.de/
Initial Description
Do you every wanted to clean and perfectly intent your xhtml document?
Initial Title
Clean / Indent XHTML Document
Initial Tags
html, xhtml
Initial Language
PHP