Batch Download Sequence data from NCBI using EUtils


/ Published in: Python
Save to your folder(s)

Based on http://www.ncbi.nlm.nih.gov/books/NBK25498/#chapter3.Application_3_Retrieving_large


Copy this code and paste it in your HTML
  1. #!/usr/bin/python
  2. # Author: Dr. Kumaran Kandasamy
  3. # E-Mail: itskkumaran@gmail.com
  4.  
  5. import urllib, urllib2, re
  6.  
  7. def main(giList, database, rettype):
  8. output = "NO_DATA"
  9. base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
  10. url = base+"epost.fcgi"
  11. values = {'db' : database,
  12. 'id' : giList,
  13. }
  14.  
  15. data = urllib.urlencode(values)
  16. req = urllib2.Request(url, data)
  17. response = urllib2.urlopen(req)
  18. queryKey = ""; webEnv = "";
  19. for line in response.readlines():
  20. line = line.strip()
  21. if re.search("<WebEnv>(.*)</WebEnv>", line):
  22. webEnv = re.search("<WebEnv>(.*)</WebEnv>", line).groups()[0]
  23. if re.search("<QueryKey>(.*)</QueryKey>", line):
  24. queryKey = re.search("<QueryKey>(.*)</QueryKey>", line).groups()[0]
  25. if queryKey != "" and webEnv != "":
  26. print queryKey, webEnv
  27. url = base+"efetch.fcgi";
  28. values = {
  29. 'db':database,
  30. 'query_key':queryKey,
  31. 'WebEnv':webEnv,
  32. 'rettype':rettype,
  33. 'retmode':'text'
  34. }
  35. #post the efetch URL
  36. data = urllib.urlencode(values)
  37. req = urllib2.Request(url, data)
  38. response = urllib2.urlopen(req)
  39. output = response.readlines()
  40. return output
  41.  
  42. if __name__ == "__main__":
  43. gi = "24475906,224465210,50978625,9507198"
  44. main(gi, 'nucleotide', 'fasta')

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.