Parse phrases from string


/ Published in: PHP
Save to your folder(s)



Copy this code and paste it in your HTML
  1. <?php
  2. /**
  3.  * Parses a String of Tags
  4.  *
  5.  * Tags are space delimited. Either single or double quotes mark a phrase.
  6.  * Odd quotes will cause everything on their right to reflect as one single
  7.  * tag or phrase. All white-space within a phrase is converted to single
  8.  * space characters. Quotes burried within tags are ignored! Duplicate tags
  9.  * are ignored, even duplicate phrases that are equivalent.
  10.  *
  11.  * Returns an array of tags.
  12.  */
  13. function ParseTagString($sTagString)
  14. {
  15. $arTags = array(); // Array of Output
  16. $cPhraseQuote = null; // Record of the quote that opened the current phrase
  17. $sPhrase = null; // Temp storage for the current phrase we are building
  18.  
  19. // Define some constants
  20. static $sTokens = "
  21. \t"; // Space, Return, Newline, Tab
  22. static $sQuotes = "'\""; // Single and Double Quotes
  23.  
  24. // Start the State Machine
  25. do
  26. {
  27. // Get the next token, which may be the first
  28. $sToken = isset($sToken)? strtok($sTokens) : strtok($sTagString, $sTokens);
  29.  
  30. // Are there more tokens?
  31. if ($sToken === false)
  32. {
  33. // Ensure that the last phrase is marked as ended
  34. $cPhraseQuote = null;
  35. }
  36. else
  37. {
  38. // Are we within a phrase or not?
  39. if ($cPhraseQuote !== null)
  40. {
  41. // Will the current token end the phrase?
  42. if (substr($sToken, -1, 1) === $cPhraseQuote)
  43. {
  44. // Trim the last character and add to the current phrase, with a single leading space if necessary
  45. if (strlen($sToken) > 1) $sPhrase .= ((strlen($sPhrase) > 0)? ' ' : null) . substr($sToken, 0, -1);
  46. $cPhraseQuote = null;
  47. }
  48. else
  49. {
  50. // If not, add the token to the phrase, with a single leading space if necessary
  51. $sPhrase .= ((strlen($sPhrase) > 0)? ' ' : null) . $sToken;
  52. }
  53. }
  54. else
  55. {
  56. // Will the current token start a phrase?
  57. if (strpos($sQuotes, $sToken[0]) !== false)
  58. {
  59. // Will the current token end the phrase?
  60. if ((strlen($sToken) > 1) && ($sToken[0] === substr($sToken, -1, 1)))
  61. {
  62. // The current token begins AND ends the phrase, trim the quotes
  63. $sPhrase = substr($sToken, 1, -1);
  64. }
  65. else
  66. {
  67. // Remove the leading quote
  68. $sPhrase = substr($sToken, 1);
  69. $cPhraseQuote = $sToken[0];
  70. }
  71. }
  72. else
  73. $sPhrase = $sToken;
  74. }
  75. }
  76.  
  77. // If, at this point, we are not within a phrase, the prepared phrase is complete and can be added to the array
  78. if (($cPhraseQuote === null) && ($sPhrase != null))
  79. {
  80. $sPhrase = strtolower($sPhrase);
  81. if (!in_array($sPhrase, $arTags)) $arTags[] = $sPhrase;
  82. $sPhrase = null;
  83. }
  84. }
  85. while ($sToken !== false); // Stop when we receive FALSE from strtok()
  86. return $arTags;
  87. }
  88.  

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.