Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env python
import csv
from xml.etree import ElementTree
from langdetect import detect_langs
from requests_html import HTMLSession
http = HTMLSession()
def langs(url):
resp = http.get(url)
main = resp.html.find('section.main', first=True)
return {l.lang: l.prob for l in detect_langs(main.text)}
def urls():
doc = ElementTree.fromstring(http.get('https://usbeketrica.com/sitemap.xml').text)
for url in doc.findall('.//{http://www.google.com/schemas/sitemap/0.84}loc'):
if url.text.startswith('https://usbeketrica.com/article/'):
yield url.text
output = csv.writer(open('langs.csv', 'w'))
output.writerow(['url', 'fr', 'en'])
for url in urls():
results = langs(url)
print(url, results)
output.writerow([url, results.get('fr', 0), results.get('en', 0)])
@WaliyaYohannaJoseph

This comment has been minimized.

Copy link

commented Jun 10, 2019

it is working for me

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.