Return to Snippet

Revision: 35390
at November 6, 2010 11:47 by sambou


Initial Code
private int WordCount(string Passage)
        {
            if (Passage.Trim().Length > 0)
            {
                //Replace Carriage returns, tabs and Line Feeds
                string temp;
                //Create array to hold the split results from the normal string object
                string[] tempSplit;
                //Create a character delimiter (space) for split function. This will define the number of words.
                char[] Seperator = { ' ' };
                //Replace Carriage Returns
                temp = Passage.Replace((char)13, ' ');
                //Replace Line Feeds
                temp = temp.Replace((char)10, ' ');
                //Replace Tabs, vertical
                temp = temp.Replace((char)11, ' ');

                //Get rid of all spaces
                temp = temp.Replace(" ", " ");
                temp = temp.Trim();

                //shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find 
                //all instances where a space occurs more than once (2 or more)
                temp = Regex.Replace(temp, @"\s{2,}", " ");
                // This will replace any text which begins and ends with <> and has at least one character in the middle
                temp = Regex.Replace(temp, @"<(.|\n)+?>", "");
                // Now replace the actual less-than, greater-than characters with their HTML encoded forms.
                temp = Regex.Replace(temp, @"<", "&lt;");
                temp = Regex.Replace(temp, @">", "&gt;");
                //set the string array = to the results from the split of the original string (now devoid of all obstructive characters)
                tempSplit = temp.Split(Seperator);

                int word_count = 0;
                bool btwn_brackets = false;

                // ignore all words between square brackets [ ]
                foreach (string word in tempSplit)
                {
                    if (word.Contains("[") && word.Contains("]"))
                    {
                        btwn_brackets = false; // one word in brackets
                        word_count--;
                    }
                    else if (word.Contains("[") && !word.Contains("]"))
                    {
                        btwn_brackets = true; // start of brackets
                    }
                    else if (!word.Contains("[") && word.Contains("]"))
                    {
                        btwn_brackets = false; // end of brackets
                    }
                    else
                    {
                        if (!btwn_brackets)
                        {
                            word_count++; // no brackets
                        }
                    }
                }

                //finally, return the length of the array, this will be the count of words, in English
                return word_count;
            }
            else
            {
                return 0;
            }
        }

Initial URL

                                

Initial Description
Accurate word count matches that within Microsoft Word. Additional feature that ignores all words within square brackets [ ], which were used for comments. Remove the ignore block if this feature is not required.

Initial Title
Returns a word count in a text block, while ignoring words within square brackets

Initial Tags

                                

Initial Language
C#