Regular Expression Key Clean


/ Published in: Python
Save to your folder(s)



Copy this code and paste it in your HTML
  1. """
  2. ****************************************************
  3. *Name : regexkeyclean.py
  4. *Author : Jason Spadaro
  5. *Date : Mon Jun 8 17:39:16 EDT 2009
  6. *Description : Demonstrates using regular expressions to take data from a
  7. * list, and incorporate it into a dictionary.
  8. *
  9. * Copyright 2009
  10. ****************************************************
  11. """
  12.  
  13. import re
  14.  
  15. class myRegexer:
  16. """
  17. ****************************************************
  18. *All this does is describe an aggregator of regular expression objects
  19. ****************************************************
  20. """
  21.  
  22. def __init__(self, exps):
  23. """
  24. ****************************************************
  25. *Takes a dictionary of strings representing regular expressions,
  26. *uses that dictionary to create a dictionary of regular expression
  27. *objects, and places them in in the instance data.
  28. ****************************************************
  29. """
  30.  
  31. self.exps = exps
  32. tempDct = {}
  33. for k, v in self.exps.iteritems():
  34. tempDct.update({k: re.compile(v)})
  35. self.exps = tempDct
  36.  
  37. class dataItem:
  38. """
  39. ****************************************************
  40. *An individual data object
  41. ****************************************************
  42. """
  43. def __init__(self, dct):
  44. """
  45. ****************************************************
  46. *Sets up the dictionary instance data
  47. ****************************************************
  48. """
  49.  
  50. self.dct = {"name":"", "id":"", "data":""}
  51. self.dct.update(dct)
  52.  
  53. def __str__(self):
  54. """
  55. ****************************************************
  56. *Convient formating if you need to "see" what this is
  57. ****************************************************
  58. """
  59. myStr = ""
  60. for k, v in self.dct:
  61. myStr = myStr + k + "-->" + v + ","
  62. myStr = myStr[0,-1]
  63. return myStr
  64.  
  65. class dataDictionary:
  66. """
  67. ****************************************************
  68. *Aggregates dataItems, with a method to change the list to a dictionary
  69. *formatteed according to regular expressions.
  70. ****************************************************
  71. """
  72.  
  73. def __init__(self, dataList, regexExps):
  74. """
  75. ****************************************************
  76. *Sets up the data list, data dictionary, and the regular
  77. *expressions to be used.
  78. ****************************************************
  79. """
  80.  
  81. self.dataItems = {}
  82. self.dataList = dataList
  83. self.regexExps = regexExps
  84. self.myRegexer = myRegexer(self.regexExps)
  85.  
  86. def listToItems(self):
  87. """
  88. ****************************************************
  89. *Converts the list of items to a dictionary of items. It keys
  90. *the item based on a matches to the "name" regular expression.
  91. *The value associated with that key is a dictionary where the
  92. *all the data is keyed similarly using regular expressions.
  93. ****************************************************
  94. """
  95.  
  96. self.dataItems = {}
  97. for i in self.dataList:
  98. tempDct = {}
  99. name = ""
  100. for k, v in i.iteritems():
  101. """
  102. ****************************************************
  103. *Designates the regular expression to associate
  104. *with each key.
  105. ****************************************************
  106. """
  107.  
  108. if self.myRegexer.exps["name"].match(k):
  109. tempDct.update({"name":v})
  110. name = tempDct["name"]
  111.  
  112. if self.myRegexer.exps["id"].match(k):
  113. tempDct.update({"id":v})
  114.  
  115. if self.myRegexer.exps["data"].match(k):
  116. tempDct.update({"data":v})
  117.  
  118. self.dataItems.update({name:tempDct})
  119.  
  120. def __str__(self):
  121. """
  122. ****************************************************
  123. *User readable version of the data
  124. ****************************************************
  125. """
  126.  
  127. myStr = ""
  128. for k, v in self.dataItems.iteritems():
  129. myStr = myStr + "\n\n____" + k + "____\n"
  130. myStr = myStr + "\t" + v.__str__()
  131. return myStr
  132.  
  133. ##########################################################################
  134.  
  135. if __name__ == "__main__":
  136.  
  137. #First we need our data...
  138. my_data = [{"dataName":"foo",
  139. "idNum":1,
  140. "data":1},
  141.  
  142. {"dataname":"bar",
  143. "id_num":2,
  144. "data":"10"}]
  145.  
  146. #Now, how are we going to designate keys? Like this:
  147. my_regular_expressions = {"name":"(.*)(N|n)(A|a)(M|m)(E|e)(.*)",
  148. "id":"(.*)(I|i)(D|d)(.*)",
  149. "data":"data"}
  150.  
  151. #Instantiate our data dictionary
  152. sample_data_dct = dataDictionary(my_data, my_regular_expressions)
  153.  
  154. #Here's the list we've fed in
  155. print sample_data_dct.dataList
  156. print
  157.  
  158. #Here's our dictionary. Notice that it's empty.
  159. print sample_data_dct.dataItems
  160. print
  161. print "#####################################"
  162. print
  163.  
  164. #Here's the magic. Converting the list to our dictionary of "Items"
  165. sample_data_dct.listToItems()
  166.  
  167. #The final output. Notice that (a) the name of each object has been
  168. #successfully extracted, and (b) each object's keys have been normalized
  169. #for the appropriate values.
  170. print sample_data_dct
  171.  
  172.  
  173. ######################OUTPUT#######################
  174. """
  175.  
  176. [{'dataName': 'foo', 'idNum': 1, 'data': 1}, {'dataname': 'bar', 'id_num': 2, 'data': '10'}]
  177.  
  178. {}
  179.  
  180. #####################################
  181.  
  182.  
  183.  
  184. ____foo____
  185. {'data': 1, 'name': 'foo', 'id': 1}
  186.  
  187. ____bar____
  188. {'data': '10', 'name': 'bar', 'id': 2}
  189.  
  190. """

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.