Skip to content

Instantly share code, notes, and snippets.

@rgov
Last active June 14, 2021 21:37
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rgov/ea3151f016aa2c2ce4b0893b07210f11 to your computer and use it in GitHub Desktop.
Save rgov/ea3151f016aa2c2ce4b0893b07210f11 to your computer and use it in GitHub Desktop.
'''
Helps you submit an HTML form like a browser would.
form = Form(soup.find('form', name='my_form'))
requests.request(
form.get_method('login'),
form.get_action('login', relative_to='http://example.com/'),
data=form.fill('login', {
'username': 'bob',
'password': 'hunter2',
})
)
The values passed to Form.fill() override default values.
Above, 'login' is the submit button to use. If omitted, the first submit button
(<input type="submit"> or <button type="submit">) is used.
Known issues:
- Cannot uncheck a checkbox whose default state is checked
- Does not find the default selected <option> for a <select>
'''
import urllib.parse
# see https://www.w3.org/TR/html52/sec-forms.html
class Form:
def __init__(self, form):
assert form.name == 'form' # feed me BeautifulSoup <form> tags
self.form = form
self._action = form.get('action', '')
self._method = form.get('method', 'GET')
self.fields = {}
self.buttons = {}
for field in form.find_all(('input', 'button', 'select', 'textarea')):
name = field.get('name')
if not name:
continue
self.fields[name] = (field, field.get('value'))
if field.name in ('input', 'button'):
if field.get('type') == 'submit':
self.buttons[name] = (field.get('formaction'),
field.get('formmethod'))
def _get_default_button(self):
if self.buttons:
return next(iter(self.buttons.keys()))
def get_action(self, button=None, relative_to=''):
# Get default submit button if none specified
if button is None:
button = self._get_default_button()
# Use the submit button's formaction if available
action = None if button is None else self.buttons[button][0]
action = action or self._action
return urllib.parse.urljoin(relative_to, action)
def get_method(self, button=None):
if button is None:
button = self._get_default_button()
method = None if button is None else self.buttons[button][1]
return method or self._method
def _fill_impl(self, button, values):
filled = {}
for form_name, (field, default_value) in self.fields.items():
# Skip disabled fields
if field.has_attr('disabled'):
continue
# Skip buttons that are not the submit button
is_button = (field.name == 'button') or (field.name == 'input' and \
field.get('type') in ('submit', 'image', 'reset', 'button'))
if is_button and form_name != button:
continue
# Skip radio buttons and checkboxes that are not checked
is_radio_or_checkbox = (field.name == 'input' and \
field.get('type') in ('radio', 'checkbox'))
if is_radio_or_checkbox and not field.has_attr('checked'):
continue
# Add the default value
if default_value is None:
if is_button:
default_value = 'Submit'
elif is_radio_or_checkbox:
default_value = 'on'
else:
default_value = ''
filled[form_name] = default_value
# Override any form values with our input
filled.update(values)
return filled
def fill(self, *args):
if len(args) == 0:
return self._fill_impl(self._get_default_button(), {})
elif len(args) == 1:
return self._fill_impl(self._get_default_button(), args[0])
elif len(args) == 2:
return self._fill_impl(args[0], args[1])
raise ValueError('Expected fill(values) or fill(button, values)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment