/ Published in: C#
Accurate word count matches that within Microsoft Word. Additional feature that ignores all words within square brackets [ ], which were used for comments. Remove the ignore block if this feature is not required.
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
private int WordCount(string Passage) { if (Passage.Trim().Length > 0) { //Replace Carriage returns, tabs and Line Feeds string temp; //Create array to hold the split results from the normal string object string[] tempSplit; //Create a character delimiter (space) for split function. This will define the number of words. char[] Seperator = { ' ' }; //Replace Carriage Returns temp = Passage.Replace((char)13, ' '); //Replace Line Feeds temp = temp.Replace((char)10, ' '); //Replace Tabs, vertical temp = temp.Replace((char)11, ' '); //Get rid of all spaces temp = temp.Replace(" ", " "); temp = temp.Trim(); //shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find //all instances where a space occurs more than once (2 or more) temp = Regex.Replace(temp, @"\s{2,}", " "); // This will replace any text which begins and ends with <> and has at least one character in the middle temp = Regex.Replace(temp, @"<(.|\n)+?>", ""); // Now replace the actual less-than, greater-than characters with their HTML encoded forms. temp = Regex.Replace(temp, @"<", "<"); temp = Regex.Replace(temp, @">", ">"); //set the string array = to the results from the split of the original string (now devoid of all obstructive characters) tempSplit = temp.Split(Seperator); int word_count = 0; bool btwn_brackets = false; // ignore all words between square brackets [ ] foreach (string word in tempSplit) { if (word.Contains("[") && word.Contains("]")) { btwn_brackets = false; // one word in brackets word_count--; } else if (word.Contains("[") && !word.Contains("]")) { btwn_brackets = true; // start of brackets } else if (!word.Contains("[") && word.Contains("]")) { btwn_brackets = false; // end of brackets } else { if (!btwn_brackets) { word_count++; // no brackets } } } //finally, return the length of the array, this will be the count of words, in English return word_count; } else { return 0; } }