Posted By

sambou on 11/06/10

Tagged

Viewed 0 times

Returns a word count in a text block, while ignoring words within square brackets


/ Published in: C#

Accurate word count matches that within Microsoft Word. Additional feature that ignores all words within square brackets [ ], which were used for comments. Remove the ignore block if this feature is not required.

  1. private int WordCount(string Passage)
  2. {
  3. if (Passage.Trim().Length > 0)
  4. {
  5. //Replace Carriage returns, tabs and Line Feeds
  6. string temp;
  7. //Create array to hold the split results from the normal string object
  8. string[] tempSplit;
  9. //Create a character delimiter (space) for split function. This will define the number of words.
  10. char[] Seperator = { ' ' };
  11. //Replace Carriage Returns
  12. temp = Passage.Replace((char)13, ' ');
  13. //Replace Line Feeds
  14. temp = temp.Replace((char)10, ' ');
  15. //Replace Tabs, vertical
  16. temp = temp.Replace((char)11, ' ');
  17.  
  18. //Get rid of all spaces
  19. temp = temp.Replace(" ", " ");
  20. temp = temp.Trim();
  21.  
  22. //shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find
  23. //all instances where a space occurs more than once (2 or more)
  24. temp = Regex.Replace(temp, @"\s{2,}", " ");
  25. // This will replace any text which begins and ends with <> and has at least one character in the middle
  26. temp = Regex.Replace(temp, @"<(.|\n)+?>", "");
  27. // Now replace the actual less-than, greater-than characters with their HTML encoded forms.
  28. temp = Regex.Replace(temp, @"<", "&lt;");
  29. temp = Regex.Replace(temp, @">", "&gt;");
  30. //set the string array = to the results from the split of the original string (now devoid of all obstructive characters)
  31. tempSplit = temp.Split(Seperator);
  32.  
  33. int word_count = 0;
  34. bool btwn_brackets = false;
  35.  
  36. // ignore all words between square brackets [ ]
  37. foreach (string word in tempSplit)
  38. {
  39. if (word.Contains("[") && word.Contains("]"))
  40. {
  41. btwn_brackets = false; // one word in brackets
  42. word_count--;
  43. }
  44. else if (word.Contains("[") && !word.Contains("]"))
  45. {
  46. btwn_brackets = true; // start of brackets
  47. }
  48. else if (!word.Contains("[") && word.Contains("]"))
  49. {
  50. btwn_brackets = false; // end of brackets
  51. }
  52. else
  53. {
  54. if (!btwn_brackets)
  55. {
  56. word_count++; // no brackets
  57. }
  58. }
  59. }
  60.  
  61. //finally, return the length of the array, this will be the count of words, in English
  62. return word_count;
  63. }
  64. else
  65. {
  66. return 0;
  67. }
  68. }

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.