Skip to content

Instantly share code, notes, and snippets.

@CoMPaTech
Forked from simonw/gist:104413
Created March 20, 2018 21:18
Show Gist options
  • Save CoMPaTech/f42a306dda106809f20d1cc6f8c16584 to your computer and use it in GitHub Desktop.
Save CoMPaTech/f42a306dda106809f20d1cc6f8c16584 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to a dict of fields and default values - useful for screen scraping forms and then resubmitting them
def extract_form_fields(self, soup):
"Turn a BeautifulSoup form in to a dict of fields and default values"
fields = {}
for input in soup.findAll('input'):
# ignore submit/image with no name attribute
if input['type'] in ('submit', 'image') and not input.has_key('name'):
continue
# single element nome/value fields
if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'):
value = ''
if input.has_key('value'):
value = input['value']
fields[input['name']] = value
continue
# checkboxes and radios
if input['type'] in ('checkbox', 'radio'):
value = ''
if input.has_key('checked'):
if input.has_key('value'):
value = input['value']
else:
value = 'on'
if fields.has_key(input['name']) and value:
fields[input['name']] = value
if not fields.has_key(input['name']):
fields[input['name']] = value
continue
assert False, 'input type %s not supported' % input['type']
# textareas
for textarea in soup.findAll('textarea'):
fields[textarea['name']] = textarea.string or ''
# select fields
for select in soup.findAll('select'):
value = ''
options = select.findAll('option')
is_multiple = select.has_key('multiple')
selected_options = [
option for option in options
if option.has_key('selected')
]
# If no select options, go with the first one
if not selected_options and options:
selected_options = [options[0]]
if not is_multiple:
assert(len(selected_options) < 2)
if len(selected_options) == 1:
value = selected_options[0]['value']
else:
value = [option['value'] for option in selected_options]
fields[select['name']] = value
return fields
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment