python script to extract all email addresses from bulk text


/ Published in: Python
Save to your folder(s)

testing tool: http://www.pythonregex.com/


Copy this code and paste it in your HTML
  1. # this script will open a file with email addresses in it, then extract
  2. # those address and write them to a new file
  3.  
  4. import os
  5. import re
  6.  
  7. # vars for filenames
  8. filename = 'emaillist.txt'
  9. newfilename = 'emaillist-rev.txt'
  10.  
  11. # read file
  12. if os.path.exists(filename):
  13. data = open(filename,'r')
  14. bulkemails = data.read()
  15. else:
  16. print "File not found."
  17. raise SystemExit
  18.  
  19. r = re.compile(r'(\b[\w.]+@+[\w.]+.+[\w.]\b)')
  20. results = r.findall(bulkemails)
  21.  
  22. emails = ""
  23. for x in results:
  24. emails += str(x)+"\n"
  25.  
  26. # function to write file
  27. def writefile():
  28. f = open(newfilename, 'w')
  29. f.write(emails)
  30. f.close()
  31. print "File written."
  32.  
  33. # function to handle overwrite question
  34. def overwrite_ok():
  35. response = raw_input("Are you sure you want to overwrite "+str(newfilename)+"? Yes or No\n")
  36. if response == "Yes":
  37. writefile()
  38. elif response == "No":
  39. print "Aborted."
  40. else:
  41. print "Please enter Yes or No."
  42. overwrite_ok()
  43.  
  44. # write/overwrite
  45. if os.path.exists(newfilename):
  46. overwrite_ok()
  47. else:
  48. writefile()

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.