Return to Snippet

Revision: 20081
at November 9, 2009 05:19 by tillkruess


Updated Code
function clean_xhtml($string, $keep_tags = null) {

	if (!$keep_tags) {
		$keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s';
	}

	// replace 
 with \n
	$string = preg_replace('~
~m', "\n", $string);

	// replace \r with \n
	$string = preg_replace('~\r~m', "\n", $string);
	
	// remove whitespace from the beginnig
	$string = preg_replace('~^\s+~s', '', $string);

	// remove whitespace from the end
	$string = preg_replace('~\s+$~s', '', $string);

	// store all tag which should remain the same
	preg_match_all($keep_regexp, $string, $original_tags);

	// remove whitespace from the beginning of each line
	$string = preg_replace('~^\s+~m', '', $string);

	// remove whitespace from the end of each line
	$string = preg_replace('~\s+$~m', '', $string);

	// removes empty lines
	$string = preg_replace('~\n\s*\n~ms', "\n", $string);

	// removes line breaks inside normal text
	$string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string);

	// correct indention
	$indent = 0;
	$string = explode("\n", $string);
	foreach ($string as &$line) {
		$correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag
		$line = str_repeat("\t", $indent - $correction).$line;
		$indent += substr_count($line, '<'); // indent every tag
		$indent -= substr_count($line, '<!'); // subtract doctype declaration
		$indent -= substr_count($line, '<?'); // subtract processing instructions
		$indent -= substr_count($line, '/>'); // subtract self closing tags
		$indent -= substr_count($line, '</') * 2; // subtract closing tags
	}
	$string = implode("\n", $string);

	// fetch all tag which could been changed
	preg_match_all($keep_regexp, $string, $current_tags);

	// restore all stored tags
	foreach ($current_tags[0] as $key => $match) {
		$string = str_replace($match, $original_tags[0][$key], $string);
	}

	return $string;

}

Revision: 20080
at November 6, 2009 09:54 by tillkruess


Initial Code
function clean_xhtml($string, $keep_tags = null) {

		if (!$keep_tags) {
			$keep_regexp = '~<script[^>]*>.*?<\/script>|<pre[^>]*>.*?<\/pre>|<textarea[^>]*>.*?<\/textarea>~s';
		}

		// replace 
 with \n
		$string = preg_replace('~
~m', "\n", $string);

		// replace \r with \n
		$string = preg_replace('~\r~m', "\n", $string);
	
		// remove whitespace from the beginnig
		$string = preg_replace('~^\s+~s', '', $string);

		// remove whitespace from the end
		$string = preg_replace('~\s+$~s', '', $string);

		// store all tag which should remain the same
		preg_match_all($keep_regexp, $string, $original_tags);

		// remove whitespace from the beginning of each line
		$string = preg_replace('~^\s+~m', '', $string);

		// remove whitespace from the end of each line
		$string = preg_replace('~\s+$~m', '', $string);

		// removes empty lines
		$string = preg_replace('~\n\s*\n~ms', "\n", $string);

		// removes line breaks inside normal text
		$string = preg_replace('~([^>\s])(\s\s+|\n)([^<\s])~m', '$1 $3', $string);

		// correct indention
		$indent = 0;
		$string = explode("\n", $string);
		foreach ($string as &$line) {
			$correction = intval(substr($line, 0, 2) == '</'); // correct indention, if line starts with closing tag
			$line = str_repeat("\t", $indent - $correction).$line;
			$indent += substr_count($line, '<'); // indent every tag
			$indent -= substr_count($line, '<!'); // subtract doctype declaration
			$indent -= substr_count($line, '<?'); // subtract processing instructions
			$indent -= substr_count($line, '/>'); // subtract self closing tags
			$indent -= substr_count($line, '</') * 2; // subtract closing tags
		}
		$string = implode("\n", $string);

		// fetch all tag which could been changed
		preg_match_all($keep_regexp, $string, $current_tags);

		// restore all stored tags
		foreach ($current_tags[0] as $key => $match) {
			$string = str_replace($match, $original_tags[0][$key], $string);
		}

		return $string;

	}

Initial URL
http://pralinenschachtel.de/

Initial Description
Do you every wanted to clean and perfectly intent your xhtml document?

Initial Title
Clean / Indent XHTML Document

Initial Tags
html, xhtml

Initial Language
PHP