Created
February 10, 2020 04:59
-
-
Save trohit/d79152c93c695b100da85514ee0ff523 to your computer and use it in GitHub Desktop.
get_mfdeltas.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import requests | |
import urllib | |
import code | |
import os | |
from bs4 import BeautifulSoup | |
import pdb | |
import sys | |
url = 'https://www.mutualfundindia.com/MF/Portfolio/Details?id=32144' | |
is_debug_mode = False | |
def xprint(*args, **kwargs): | |
if is_debug_mode: | |
print( ">> "+" ".join(map(str,args))+"XXX", **kwargs) | |
def get_stocks(url): | |
page = requests.get(url) | |
# Create a BeautifulSoup object | |
soup = BeautifulSoup(page.text, 'html.parser') | |
# Pull all text from the BodyText div | |
text = soup.find(div='coltopHolding') | |
code.interact(local=locals()) | |
# Pull text from all instances of <a> tag within BodyText div | |
tables = text.find_all_next('table') | |
#Create a handle, page, to handle the contents of the website | |
#//contents = urllib.request.urlopen(url).read() | |
#//print("fetched data from :" + url) | |
code.interact(local=locals()) | |
#Store the contents of the website under doc | |
doc = lh.fromstring(page.content) | |
#Parse data that are stored between <tr>..</tr> of HTML | |
tr_elements = doc.xpath('//tr') | |
#Check the length of the first 5 rows | |
[len(T) for T in tr_elements[:5]] | |
tr_elements = doc.xpath('//tr') | |
#Create empty list | |
col=[] | |
i=0 | |
#For each row, store each first element (header) and an empty list | |
for t in tr_elements[0]: | |
i+=1 | |
name=t.text_content() | |
print ('%d:"%s"'%(i,name)) | |
col.append((name,[])) | |
print(name) | |
def get_raw_url_data(url): | |
ll =[] | |
cmd = "curl -s " + url + " -o ./raw.txt" | |
os.system(cmd) | |
cmd2 = "html2text raw.txt > text.txt" | |
os.system(cmd2) | |
f = open("text.txt", "rt") | |
xprint ("Name of the file: ", f.name) | |
i = 0 | |
if is_debug_mode: | |
pdb.set_trace() | |
is_loi = False | |
for l in f: | |
l = l.rstrip() | |
i = i+ 1 | |
xprint(str(i) + ":[" + l + "]") | |
if "Percentage Allocation" in l: | |
print("loi = True") | |
is_loi = True | |
continue | |
elif 'Detailed Portfolio' in l: | |
xprint('detailed portfolio seen') | |
is_loi = False | |
xprint("loi = False") | |
print(ll) | |
exit() | |
elif is_loi: | |
if 'Equity' == l[:6]: | |
xprint('skip:equity seen') | |
continue | |
elif l[-1] == '%': | |
xprint('skip:% seen') | |
continue | |
else: | |
print("appending:" + l) | |
ll.append(l) | |
if __name__ == "__main__": | |
url = sys.argv[1] | |
print(url) | |
get_raw_url_data(url) | |
#get_stocks(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment