Skip to content

Instantly share code, notes, and snippets.

@trohit
Created February 10, 2020 04:59
Show Gist options
  • Save trohit/d79152c93c695b100da85514ee0ff523 to your computer and use it in GitHub Desktop.
Save trohit/d79152c93c695b100da85514ee0ff523 to your computer and use it in GitHub Desktop.
get_mfdeltas.py
#!/usr/bin/python3
import requests
import urllib
import code
import os
from bs4 import BeautifulSoup
import pdb
import sys
url = 'https://www.mutualfundindia.com/MF/Portfolio/Details?id=32144'
is_debug_mode = False
def xprint(*args, **kwargs):
if is_debug_mode:
print( ">> "+" ".join(map(str,args))+"XXX", **kwargs)
def get_stocks(url):
page = requests.get(url)
# Create a BeautifulSoup object
soup = BeautifulSoup(page.text, 'html.parser')
# Pull all text from the BodyText div
text = soup.find(div='coltopHolding')
code.interact(local=locals())
# Pull text from all instances of <a> tag within BodyText div
tables = text.find_all_next('table')
#Create a handle, page, to handle the contents of the website
#//contents = urllib.request.urlopen(url).read()
#//print("fetched data from :" + url)
code.interact(local=locals())
#Store the contents of the website under doc
doc = lh.fromstring(page.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')
#Check the length of the first 5 rows
[len(T) for T in tr_elements[:5]]
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
i=0
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
i+=1
name=t.text_content()
print ('%d:"%s"'%(i,name))
col.append((name,[]))
print(name)
def get_raw_url_data(url):
ll =[]
cmd = "curl -s " + url + " -o ./raw.txt"
os.system(cmd)
cmd2 = "html2text raw.txt > text.txt"
os.system(cmd2)
f = open("text.txt", "rt")
xprint ("Name of the file: ", f.name)
i = 0
if is_debug_mode:
pdb.set_trace()
is_loi = False
for l in f:
l = l.rstrip()
i = i+ 1
xprint(str(i) + ":[" + l + "]")
if "Percentage Allocation" in l:
print("loi = True")
is_loi = True
continue
elif 'Detailed Portfolio' in l:
xprint('detailed portfolio seen')
is_loi = False
xprint("loi = False")
print(ll)
exit()
elif is_loi:
if 'Equity' == l[:6]:
xprint('skip:equity seen')
continue
elif l[-1] == '%':
xprint('skip:% seen')
continue
else:
print("appending:" + l)
ll.append(l)
if __name__ == "__main__":
url = sys.argv[1]
print(url)
get_raw_url_data(url)
#get_stocks(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment