f9n/python3feedfinder

## python3feedfinder
#!/usr/local/bin/python3.6
import requests
import feedparser
from urllib.parse import urljoin
from lxml import html

def findfeedWithLxml(site):
    raw = requests.get(site).content
    result = []
    possibleFeeds = []
    tree = html.fromstring(raw)
    feedUrls = tree.xpath("//link[@rel='alternate']")
    if feedUrls:
        for feed in feedUrls:
            t = feed.xpath('@type')[0]
            if t:
                if "rss" in t or "xml" in t:
                    href = feed.xpath('@href')[0]
                    if href:
                        possibleFeeds.append(href)

    atags = tree.xpath("//a")
    for a in atags:
        href = a.xpath('@href')
        if href:
            href = href[0]
            if "xml" in href or "rss" in href or "feed" in href:
                possibleFeeds.append(urljoin(site, href))
        else:
            print('Else', href)

    for url in list(set(possibleFeeds)):
        f = feedparser.parse(url)
        if len(f.entries) > 0:
            if url not in result:
                result.append(url)

    return (result)

if __name__ == '__main__':
    print(findfeedWithLxml('http://www.bahcesehir.edu.tr/'))
	#!/usr/local/bin/python3.6
	import requests
	import feedparser
	from urllib.parse import urljoin
	from lxml import html

	def findfeedWithLxml(site):
	raw = requests.get(site).content
	result = []
	possibleFeeds = []
	tree = html.fromstring(raw)
	feedUrls = tree.xpath("//link[@rel='alternate']")
	if feedUrls:
	for feed in feedUrls:
	t = feed.xpath('@type')[0]
	if t:
	if "rss" in t or "xml" in t:
	href = feed.xpath('@href')[0]
	if href:
	possibleFeeds.append(href)

	atags = tree.xpath("//a")
	for a in atags:
	href = a.xpath('@href')
	if href:
	href = href[0]
	if "xml" in href or "rss" in href or "feed" in href:
	possibleFeeds.append(urljoin(site, href))
	else:
	print('Else', href)

	for url in list(set(possibleFeeds)):
	f = feedparser.parse(url)
	if len(f.entries) > 0:
	if url not in result:
	result.append(url)

	return (result)

	if __name__ == '__main__':
	print(findfeedWithLxml('http://www.bahcesehir.edu.tr/'))