Return to Snippet

Revision: 14650
at June 8, 2009 17:46 by cthulhupunk0


Initial Code
"""
****************************************************
*Name : regexkeyclean.py 
*Author : Jason Spadaro
*Date : Mon Jun  8 17:39:16 EDT 2009 
*Description : Demonstrates using regular expressions to take data from a
*              list, and incorporate it into a dictionary.
*
*       Copyright 2009
****************************************************
"""

import re

class myRegexer:
        """
        ****************************************************
        *All this  does is describe an aggregator of regular expression objects
        ****************************************************
        """
        
        def __init__(self, exps):
                """
                ****************************************************
                *Takes a dictionary of strings representing regular expressions,
                *uses that dictionary to create a dictionary of regular expression
                *objects, and places them in in the instance data.
                ****************************************************
                """
                
                self.exps = exps
                tempDct = {}
                for k, v in self.exps.iteritems():
                        tempDct.update({k: re.compile(v)})
                self.exps = tempDct

class dataItem:
        """
        ****************************************************
        *An individual data object
        ****************************************************
        """
        def __init__(self, dct):
                """
                ****************************************************
                *Sets up the dictionary instance data
                ****************************************************
                """
                
                self.dct = {"name":"", "id":"", "data":""}
                self.dct.update(dct)

        def __str__(self):
                """
                ****************************************************
                *Convient formating if you need to "see" what this is
                ****************************************************
                """
                myStr = ""
                for k, v in self.dct:
                        myStr = myStr + k + "-->" + v + ","
                myStr = myStr[0,-1]
                return myStr

class dataDictionary:
        """
        ****************************************************
        *Aggregates dataItems, with a method to change the list to a dictionary
        *formatteed according to regular expressions.
        ****************************************************
        """
        
        def __init__(self, dataList, regexExps):
                """
                ****************************************************
                *Sets up the data list, data dictionary, and the regular
                *expressions to be used.
                ****************************************************
                """
                
                self.dataItems = {}
                self.dataList = dataList
                self.regexExps = regexExps
                self.myRegexer = myRegexer(self.regexExps)

        def listToItems(self):
                """
                ****************************************************
                *Converts the list of items to a dictionary of items.  It keys
                *the item based on a matches to the "name" regular expression.
                *The value associated with that key is a dictionary where the
                *all the data is keyed similarly using regular expressions. 
                ****************************************************
                """
                
                self.dataItems = {}
                for i in self.dataList:
                        tempDct = {}
                        name = ""
                        for k, v in i.iteritems():
                                """
                                ****************************************************
                                *Designates the regular expression to associate 
                                *with each key.
                                ****************************************************
                                """
                                                                
                                if self.myRegexer.exps["name"].match(k):
                                        tempDct.update({"name":v})
                                        name = tempDct["name"]

                                if self.myRegexer.exps["id"].match(k):
                                        tempDct.update({"id":v})

                                if self.myRegexer.exps["data"].match(k):
                                        tempDct.update({"data":v})

                        self.dataItems.update({name:tempDct})

        def __str__(self):
                """
                ****************************************************
                *User readable version of the data
                ****************************************************
                """
                
                myStr = ""
                for k, v in self.dataItems.iteritems():
                        myStr = myStr + "\n\n____" + k + "____\n"
                        myStr = myStr + "\t" + v.__str__()
                return myStr

##########################################################################

if __name__ == "__main__":

        #First we need our data...
        my_data = [{"dataName":"foo",
                   "idNum":1,
                   "data":1},

                  {"dataname":"bar",
                   "id_num":2,
                   "data":"10"}]
        
        #Now, how are we going to designate keys?  Like this:
        my_regular_expressions = {"name":"(.*)(N|n)(A|a)(M|m)(E|e)(.*)",
                                 "id":"(.*)(I|i)(D|d)(.*)",
                                 "data":"data"}

        #Instantiate our data dictionary
        sample_data_dct = dataDictionary(my_data, my_regular_expressions)
        
        #Here's the list we've fed in
        print sample_data_dct.dataList
        print
        
        #Here's our dictionary.  Notice that it's empty.
        print sample_data_dct.dataItems
        print
        print "#####################################"
        print
        
        #Here's the magic.  Converting the list  to our dictionary of "Items"
        sample_data_dct.listToItems()
        
        #The final output.  Notice that (a) the name of each object has been
        #successfully extracted, and (b) each object's keys have been normalized
        #for the appropriate values.
        print sample_data_dct
        
        
        ######################OUTPUT#######################
        """
                
        [{'dataName': 'foo', 'idNum': 1, 'data': 1}, {'dataname': 'bar', 'id_num': 2, 'data': '10'}]

        {}

        #####################################



        ____foo____
        	{'data': 1, 'name': 'foo', 'id': 1}

        ____bar____
        	{'data': '10', 'name': 'bar', 'id': 2}

        """

Initial URL


Initial Description


Initial Title
Regular Expression Key Clean

Initial Tags


Initial Language
Python