Skip to content

Instantly share code, notes, and snippets.

@TwiN
Last active August 22, 2022 22:44
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save TwiN/8e8f928e5b0431e96b60d2caca40556d to your computer and use it in GitHub Desktop.
Save TwiN/8e8f928e5b0431e96b60d2caca40556d to your computer and use it in GitHub Desktop.
Python equivalent of PHP's file_get_contents on websites (NOT LOCAL FILES)
import urllib2,cookielib
'''
Function that returns the source from the target url
@param url
'''
def file_get_contents(url):
url = str(url).replace(" ", "+") # just in case, no space in url
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
req = urllib2.Request(url, headers=hdr)
try:
page = urllib2.urlopen(req)
return page.read()
except urllib2.HTTPError, e:
print e.fp.read()
return ''
#example
print file_get_contents("https://twinnation.org/api/v1/ip")
@phuctvt
Copy link

phuctvt commented Mar 2, 2017

Thank you, this file helped me! :)

@talkleasy
Copy link

Thanks.

@frederikbrammer
Copy link

frederikbrammer commented Feb 19, 2021

Updated for Python 3.*:

# some packages were renamed in Python 3
import urllib.request as urllib2
import http.cookiejar as cookielib

def file_get_contents(url):
    url = str(url).replace(" ", "+") # just in case, no space in url
    hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
           'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
           'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
           'Accept-Encoding': 'none',
           'Accept-Language': 'en-US,en;q=0.8',
           'Connection': 'keep-alive'}
    req = urllib2.Request(url, headers=hdr)
    try:
        page = urllib2.urlopen(req)
        return page.read()
    except urllib2.HTTPError as e:
        print(e.fp.read())
    return ''

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment