trohit/get_mfdeltas.py

## get_mfdeltas.py
#!/usr/bin/python3
import requests
import urllib
import code
import os
from bs4 import BeautifulSoup
import pdb
import sys
url = 'https://www.mutualfundindia.com/MF/Portfolio/Details?id=32144'
is_debug_mode = False

def xprint(*args, **kwargs):
    if is_debug_mode:
        print( ">> "+" ".join(map(str,args))+"XXX", **kwargs)

def get_stocks(url):
    page = requests.get(url)


    # Create a BeautifulSoup object
    soup = BeautifulSoup(page.text, 'html.parser')

    # Pull all text from the BodyText div
    text = soup.find(div='coltopHolding')
    code.interact(local=locals())

    # Pull text from all instances of <a> tag within BodyText div
    tables = text.find_all_next('table')


    #Create a handle, page, to handle the contents of the website
    #//contents = urllib.request.urlopen(url).read()
    #//print("fetched data from :" + url)
    code.interact(local=locals())

    #Store the contents of the website under doc
    doc = lh.fromstring(page.content)

    #Parse data that are stored between <tr>..</tr> of HTML
    tr_elements = doc.xpath('//tr')

    #Check the length of the first 5 rows
    [len(T) for T in tr_elements[:5]]

    tr_elements = doc.xpath('//tr')
    #Create empty list
    col=[]
    i=0
    #For each row, store each first element (header) and an empty list
    for t in tr_elements[0]:
        i+=1
        name=t.text_content()
        print ('%d:"%s"'%(i,name))
        col.append((name,[]))
        print(name)

def get_raw_url_data(url):
    ll =[]
    cmd = "curl -s " + url + " -o ./raw.txt"
    os.system(cmd)
    cmd2 = "html2text raw.txt > text.txt"
    os.system(cmd2)

    f = open("text.txt", "rt")
    xprint ("Name of the file: ", f.name)
    i = 0
    if is_debug_mode:
        pdb.set_trace()
    is_loi = False
    for l in f:
        l = l.rstrip()
        i = i+ 1
        xprint(str(i) + ":[" + l + "]")
        if "Percentage Allocation" in l:

            print("loi = True")
            is_loi = True
            continue
        elif 'Detailed Portfolio' in l:
                    xprint('detailed portfolio seen')
                    is_loi = False
                    xprint("loi = False")
                    print(ll)
                    exit()
        elif is_loi:
                if 'Equity' == l[:6]:
                    xprint('skip:equity seen')
                    continue
                elif l[-1] == '%':
                    xprint('skip:% seen')
                    continue
                else:
                    print("appending:" + l)
                    ll.append(l)


if __name__ == "__main__":
    url = sys.argv[1]
    print(url)
    get_raw_url_data(url)
    #get_stocks(url)
	#!/usr/bin/python3
	import requests
	import urllib
	import code
	import os
	from bs4 import BeautifulSoup
	import pdb
	import sys
	url = 'https://www.mutualfundindia.com/MF/Portfolio/Details?id=32144'
	is_debug_mode = False

	def xprint(args, *kwargs):
	if is_debug_mode:
	print( ">> "+" ".join(map(str,args))+"XXX", **kwargs)

	def get_stocks(url):
	page = requests.get(url)


	# Create a BeautifulSoup object
	soup = BeautifulSoup(page.text, 'html.parser')

	# Pull all text from the BodyText div
	text = soup.find(div='coltopHolding')
	code.interact(local=locals())

	# Pull text from all instances of <a> tag within BodyText div
	tables = text.find_all_next('table')


	#Create a handle, page, to handle the contents of the website
	#//contents = urllib.request.urlopen(url).read()
	#//print("fetched data from :" + url)
	code.interact(local=locals())

	#Store the contents of the website under doc
	doc = lh.fromstring(page.content)

	#Parse data that are stored between <tr>..</tr> of HTML
	tr_elements = doc.xpath('//tr')

	#Check the length of the first 5 rows
	[len(T) for T in tr_elements[:5]]

	tr_elements = doc.xpath('//tr')
	#Create empty list
	col=[]
	i=0
	#For each row, store each first element (header) and an empty list
	for t in tr_elements[0]:
	i+=1
	name=t.text_content()
	print ('%d:"%s"'%(i,name))
	col.append((name,[]))
	print(name)

	def get_raw_url_data(url):
	ll =[]
	cmd = "curl -s " + url + " -o ./raw.txt"
	os.system(cmd)
	cmd2 = "html2text raw.txt > text.txt"
	os.system(cmd2)

	f = open("text.txt", "rt")
	xprint ("Name of the file: ", f.name)
	i = 0
	if is_debug_mode:
	pdb.set_trace()
	is_loi = False
	for l in f:
	l = l.rstrip()
	i = i+ 1
	xprint(str(i) + ":[" + l + "]")
	if "Percentage Allocation" in l:

	print("loi = True")
	is_loi = True
	continue
	elif 'Detailed Portfolio' in l:
	xprint('detailed portfolio seen')
	is_loi = False
	xprint("loi = False")
	print(ll)
	exit()
	elif is_loi:
	if 'Equity' == l[:6]:
	xprint('skip:equity seen')
	continue
	elif l[-1] == '%':
	xprint('skip:% seen')
	continue
	else:
	print("appending:" + l)
	ll.append(l)


	if __name__ == "__main__":
	url = sys.argv[1]
	print(url)
	get_raw_url_data(url)
	#get_stocks(url)