Revision: 18904
Updated Code
at October 13, 2009 00:35 by bingjian
Updated Code
#!/usr/local/bin/ruby
require 'net/http'
require 'uri'
def escape_title(s)
s = s.gsub(/(<\/a>)|( )/i,'')
s = s.gsub(/…/i,'...')
return s
end
def get_citation_list(id, start=0, max_num=10)
str = '/scholar?start=%d&cites=%s'%[start,id]
enc_uri = URI.escape(str)
papers = []
Net::HTTP.start('scholar.google.com') do |http|
req = Net::HTTP::Get.new(enc_uri)
s = http.request(req).body
localizer = s.index('</b> citing <b>')
s = s[localizer+20,s.length()-localizer+20]
s = s.gsub(/(<b>)|(<\/b>)/i,'')
start = 0
1.upto(max_num){
pos2 = s.index('</h3>', start)
pos1 = s.rindex('>', pos2-5)
title = s[pos1+1, pos2-pos1-1]
title = escape_title(title)
#puts title
papers << title
start = pos2 + 10
}
end
return papers
end
puts ' *** Googling paper "%s" *** '%ARGV[0]
enc_uri = URI.escape('/scholar?q="'+ARGV[0]+'"&num=1')
Net::HTTP.start('scholar.google.com') do |http|
req = Net::HTTP::Get.new(enc_uri)
s = http.request(req).body
pos1 = s.index('Cited by ')
if pos1
pos2 = s.index('</a>', pos1+9)
citation_num = Integer(s[pos1+9, pos2-pos1-9])
pos3 = s.rindex('cites', pos1)
pos4 = s.index('amp', pos3)
citation_id = s[pos3+6, pos4-pos3-7]
puts " -- Google Scholar Citation ID: %s"%citation_id
puts " -- Cited by the following %d papers:"%citation_num
pages = citation_num / 10
papers = []
0.upto(pages-1){ |num|
papers += get_citation_list(citation_id, num*10)
}
num_in_last_page = citation_num - pages*10
papers += get_citation_list(citation_id, pages*10, num_in_last_page)
papers.each_with_index {|p,i|
puts "[#{i+1}] #{p}"
}
else
puts "no citation found!"
end
end
Revision: 18903
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at October 11, 2009 23:14 by bingjian
Initial Code
#!/usr/local/bin/ruby
require 'net/http'
require 'uri'
def get_citation_list(id, start=0, max_num=10)
str = '/scholar?start=%d&cites=%s'%[start,id]
enc_uri = URI.escape(str)
papers = []
Net::HTTP.start('scholar.google.com') do |http|
req = Net::HTTP::Get.new(enc_uri)
s = http.request(req).body
localizer = s.index('</b> citing <b>')
1.upto(max_num){
pos2 = s.index('</a></h3>', localizer)
pos1 = s.rindex('>', pos2)
papers << s[pos1+1, pos2-pos1-1]
localizer = pos2 + 10
}
end
return papers
end
puts ' *** Googling paper "%s" *** '%ARGV[0]
enc_uri = URI.escape('/scholar?q="'+ARGV[0]+'"&num=1')
Net::HTTP.start('scholar.google.com') do |http|
req = Net::HTTP::Get.new(enc_uri)
s = http.request(req).body
pos1 = s.index('Cited by ')
if pos1
pos2 = s.index('</a>', pos1+9)
citation_num = Integer(s[pos1+9, pos2-pos1-9])
pos3 = s.rindex('cites', pos1)
pos4 = s.index('amp', pos3)
citation_id = s[pos3+6, pos4-pos3-7]
puts " -- Google Scholar Citation ID: %s"%citation_id
puts " -- Cited by the following %d papers:"%citation_num
pages = citation_num / 10
papers = []
0.upto(pages-1){ |num|
papers += get_citation_list(citation_id, num*10)
}
num_in_last_page = citation_num - pages*10
papers += get_citation_list(citation_id, pages*10, num_in_last_page)
papers.each_with_index {|p,i|
puts "[#{i+1}] #{p}"
}
else
puts "no citation found!"
end
end
Initial URL
Initial Description
Initial Title
Extract citations from Google Scholar
Initial Tags
Initial Language
Ruby