Skip to content

Instantly share code, notes, and snippets.

@MichelleDalalJian
Created November 24, 2017 16:40
Show Gist options
  • Save MichelleDalalJian/f587530b6e0a72357541f39b2022aa55 to your computer and use it in GitHub Desktop.
Save MichelleDalalJian/f587530b6e0a72357541f39b2022aa55 to your computer and use it in GitHub Desktop.
Extracting Data from XML: The program will prompt for a URL, read the XML data from that URL using urllib and then parse and extract the comment counts from the XML data, compute the sum of the numbers in the file.
from urllib import request
import xml.etree.ElementTree as ET
url = 'http://python-data.dr-chuck.net/comments_24966.xml'
print ("Retrieving", url)
html = request.urlopen(url)
data = html.read()
print("Retrieved",len(data),"characters")
tree = ET.fromstring(data)
results = tree.findall('comments/comment')
icount=len(results)
isum=0
for result in results:
isum += float(result.find('count').text)
print(icount)
print(isum)
@xkkkkk2020
Copy link

That works! Thanks!

@LOKAKSHAI
Copy link

Thanks

@MaxiGiando
Copy link

MY ANSWER

import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl
sum = 0

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

url = "http://py4e-data.dr-chuck.net/comments_1659064.xml"

serviceurl = urllib.request.Request(url)
uh = urllib.request.urlopen(url, context=ctx)
data = uh.read()
#print('Retrieved', len(data), 'characters')
#print(data.decode())
tree = ET.fromstring(data)
lst = tree.findall('comments/comment')
#print('User count:', len(lst))
for item in lst:
#print('Numero:', item.find('count').text)
sum = sum + int(item.find('count').text)
print("Total:",sum)

@ShuckZ77
Copy link

ShuckZ77 commented May 9, 2023

import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import ssl
import re

Ignore SSL certificate errors

ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

url = input('ENTER URL : ') #http://py4e-data.dr-chuck.net/comments_1692183.xml

socket = urllib.request.urlopen(url,context=ctx)

print(socket)

data = socket.read()
data2 = data.decode() #more organized

#print(len(data))
#print(len(data2))

print(data)
print(data2)

tree = ET.fromstring(data2)
fname = tree.findall('comments/comment') #list of all comment tags
counts = tree.findall('.//count')

print(fname)
print(counts)

print(len(fname))

lst = list()

for item in fname:
name = item.find('name').text
print(name)
kount = item.find('count').text
print(kount)

lst.append(int(kount))

print(lst)

sumation = sum(lst)

print(sumation)

@alghamdiim
Copy link

alghamdiim commented May 12, 2023

MY ANSWER

import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET import ssl sum = 0

ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE

url = "http://py4e-data.dr-chuck.net/comments_1659064.xml"

serviceurl = urllib.request.Request(url) uh = urllib.request.urlopen(url, context=ctx) data = uh.read() #print('Retrieved', len(data), 'characters') #print(data.decode()) tree = ET.fromstring(data) lst = tree.findall('comments/comment') #print('User count:', len(lst)) for item in lst: #print('Numero:', item.find('count').text) sum = sum + int(item.find('count').text) print("Total:",sum)

I had a traceback:
"Traceback (most recent call last):
File "/Users/ibraheemalghamdi/Desktop/py4e/xml/xml.py", line 2, in
import xml.etree.ElementTree as ET
File "/Users/ibraheemalghamdi/Desktop/py4e/xml/xml.py", line 2, in
import xml.etree.ElementTree as ET
ModuleNotFoundError: No module named 'xml.etree'; 'xml' is not a package"

Edit: it was the file name. changed it from xml.py to week5.py and worked out. thanks!

@YoussefShehadeh
Copy link

YoussefShehadeh commented Jun 4, 2023

why I keep having this error ?
('NoneType' object has no attribute 'text')
import urllib.request, urllib.parse, urllib.error
import ssl
import xml.etree.ElementTree as ET
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
Value = input('Enter location: ')
print('Retrieving',Value)
uh = urllib.request.urlopen(Value, context=ctx)
data = uh.read()
data = data.decode()
tree = ET.fromstring(data)
counts = tree.findall('.//count')
print('Retrieved',len(data),'characters')
counter = 0
sum = 0
for elements in counts:
counter += 1
sum = (elements.find('count').text) + sum
print(counter)
print(sum)

@Perziver
Copy link

i am getting error :-

Traceback (most recent call last):
File "/Users/shantanusoni/Documents/ImpDoc/shekhar/studie_related/Coding_Culture/Python/python-coursera/example.py", line 2, in
import xml.etree.ElementTree as ET
File "/Users/shantanusoni/Documents/ImpDoc/shekhar/studie_related/Coding_Culture/Python/python-coursera/xml.py", line 2, in
import xml.etree.ElementTree as ET
ModuleNotFoundError: No module named 'xml.etree'; 'xml' is not a package

Plz help me

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment