Skip to content

Instantly share code, notes, and snippets.

@tannerjt
Created April 5, 2017 21:12
Show Gist options
  • Save tannerjt/9314639fe0bc943e299ad3f751aead5d to your computer and use it in GitHub Desktop.
Save tannerjt/9314639fe0bc943e299ad3f751aead5d to your computer and use it in GitHub Desktop.
Replace all HTML from fields in Esri feature class
from bs4 import BeautifulSoup
import urllib
import os
mxd = arcpy.mp.ArcGISProject('current')
map = mxd.listMaps()[0]
layer = map.listLayers()[0]
fields = ["title", "standardPlace", "content", "actor", "reflection", "contactName"]
edit = arcpy.da.Editor(os.path.dirname(layer.dataSource))
edit.startEditing(False, True)
with arcpy.da.UpdateCursor(layer, fields) as cursor:
for row in cursor:
for i in range(len(fields)):
try:
parsed = urllib.parse.unquote(row[i])
soup = BeautifulSoup(parsed)
texts = soup.find_all(text=True)
for t in texts:
newtext = t.replace("&nbsp", "")
newtext = t.replace(u'\xa0', u' ')
t.replace_with(newtext)
except TypeError:
texts = [""]
row[i] = " ".join(texts)
edit.startOperation()
cursor.updateRow(row)
edit.stopOperation()
edit.stopEditing(True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment