Resources
require 'net/http'
require 'uri'
def get_html_content(requested_url)
url = URI.parse(requested_url)
full_path = (url.query.blank?) ? url.path : "#{url.path}?#{url.query}"
the_request = Net::HTTP::Get.new(full_path)
the_response = Net::HTTP.start(url.host, url.port) { |http|
http.request(the_request)
}
raise "Response was not 200, response was #{the_response.code}" if the_response.code != "200"
return the_response.body
end
# this will fail with ArgumentError: HTTP request path is empty
s = get_html_content("http://www.google.com")
# these should be fine
s = get_html_content("http://www.google.com/")
s = get_html_content("http://github.com/search?q=http")
# above code does not handle redirects but raises exception for non-200
s = get_html_content("http://www.yahoo.com/") # http 302
No comments:
Post a Comment