Skip to content

Instantly share code, notes, and snippets.

@teusink
Last active June 14, 2020 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save teusink/132d52260f81ab55baa9 to your computer and use it in GitHub Desktop.
Save teusink/132d52260f81ab55baa9 to your computer and use it in GitHub Desktop.
Example coding on input validation for web-applications (do not use blindly in production environments!).
<html>
<head>
<title>Input validation for web-applications, how to process input safely and securely</title>
<script>
function validateForm() {
/* Step 1: Check if the input is actually sent and received
This step is only done when sending data from the client to the server.
So it holds no relevance here.
Step 2: Store input in memory, separate it from the source
Here we store all input in variables.
*/
var nameField = document.forms["testForm"]["nameField"].value,
emailField = document.forms["testForm"]["emailField"].value,
passwordField = document.forms["testForm"]["passwordField"].value,
repeatPasswordField = document.forms["testForm"]["repeatPasswordField"].value,
dateField = document.forms["testForm"]["dateField"].value,
urlField = document.forms["testForm"]["urlField"].value,
// To modify the error message on screen
formError = true,
errorMessage = document.getElementById("errorMessage"),
/* In the patterns below we check for the following steps.
Step 3: Check variable for, and remove all scripting
Besides using regular expression, we do not check for scripting
here. For example, the nameField element is just being check
if it does not exceed the 50 characters limit.
Step 4: Trim the variable
All regular expressions in this example have size limits.
Step 5: Truncate the variable to the maximum size of expected value
We do not use this step here. When the variable is to long, the
expression will fail and return an error.
Step 6: Check if it is the correct variable type and/or format
This step is done here (correct date, email and url format)
*/
namePattern = new RegExp(/^.{1,50}$/),
emailPattern = new RegExp(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/),
passwordPattern = new RegExp(/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/),
datePattern = new RegExp(/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/),
urlPattern = new RegExp(/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/);
errorMessage.innerHTML = "";
/* In this step we are going to get all the values from the forms.
We are going to test if there is content in them,
and in case of the required fields, we are going to require
the input.
*/
if (nameField === null || nameField === "" ||
emailField === null || emailField === "" ||
passwordField === null || passwordField === "" ||
repeatPasswordField === null || repeatPasswordField === "") {
errorMessage.innerHTML = "Not all required fields are filled in!";
formError = false;
} else {
/* Here we actually check if all input values matches the patterns
and if the passwords are equal to each other or not.
*/
if (namePattern.test(nameField) !== true &&
emailPattern.test(emailField) !== true &&
passwordPattern.test(passwordField) !== true &&
passwordPattern.test(repeatPasswordField) !== true &&
(datePattern.test(dateField) !== true || dateField === null || dateField === "") &&
(urlPattern.test(urlField) !== true || urlField === null || urlField === "") &&
passwordField === repeatPasswordField) {
errorMessage.innerHTML = "Not all fields are filled out correctly!";
formError = false;
} else {
errorMessage.innerHTML = "All fields are filled in correctly";
formError = true;
}
/*
Step 7: Check if it is expected content (also called allowlisting)
This is not done in this example.
Step 8: When relevant, check existence of local resources
This is not done in this example (not relevant).
*/
}
/* Step 9: And now is it input for the process
When formError becomes true, then all checks are done
and the outcome is valid. The input values are now
send to the server.
*/
return formError;
}
</script>
</head>
<body>
<!-- In the example below you will see the use of:
- required (whether a field is required or not)
- maxlength (what the maximum size of the value may be)
- pattern (regular expression)
These are basic checks concerning input validation, where
as the pattern is the most complex. Pattern is relatively new
and might not be supported on all browsers. Hence the extra
javascript checks that needs to be done also.
-->
<p id="errorMessage"></p>
<h1>Form for input values</h1>
<form name="testForm" action="exampleserver.php" onsubmit="return validateForm()" method="post">
<label for="nameField">Name *</label>
<input type="text" name="nameField" id="nameField" maxlength="50" placeholder="John Doe" pattern="^.{1,50}$" required>
<p>Full or partial name or nickname, maximum of 50 characters</p>
<label for="emailField">Email address *</label>
<input type="email" name="emailField" id="emailField" maxlength="254" placeholder="john@doe.com" pattern="\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b" required>
<p>Must be in format of john@doe.com</p>
<label for="passwordField">Password *</label>
<input type="password" name="passwordField" id="passwordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required>
<p>Requires at least one lower and one upper case letter, one digit, no spaces and a length of 8 to 16</p>
<label for="repeatPasswordField">Password *</label>
<input type="password" name="repeatPasswordField" id="repeatPasswordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required>
<p>Must be the same password as the one typed above</p>
<label for="dateField">Birthdate</label>
<input type="date" name="dateField" id="dateField" maxlength="10" placeholder="2000/12/31" pattern="^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$">
<p>Must be in format of mm/dd/yyyy</p>
<label for="urlField">Personal website</label>
<input type="url" name="urlField" id="urlField" maxlength="254" placeholder="http://www.johndoe.com" pattern="(^http://.{7,254})|(^https://.{7,254})">
<p>URL most start with http:// or https://</p>
<input type="submit" value="Submit">
<p>All fields with an asterisk (*) are required</p>
</form>
</body>
</html>
<html>
<head>
<title>Input validation for web-applications, how to process input safely and securely</title>
</head>
<body>
<?php
// Function to clean input from al its tags
function strip_html_tags($text) {
// PHP's strip_tags() function will remove tags, but it
// doesn't remove scripts, styles, and other unwanted
// invisible text between tags. Also, as a prelude to
// tokenizing the text, we need to insure that when
// block-level tags (such as <p> or <div>) are removed,
// neighboring words aren't joined.
$text = preg_replace(
array(
// Remove invisible content
'@<head[^>]*?>.*?</head>@siu',
'@<style[^>]*?>.*?</style>@siu',
'@<script[^>]*?.*?</script>@siu',
'@<object[^>]*?.*?</object>@siu',
'@<embed[^>]*?.*?</embed>@siu',
'@<applet[^>]*?.*?</applet>@siu',
'@<noframes[^>]*?.*?</noframes>@siu',
'@<noscript[^>]*?.*?</noscript>@siu',
'@<noembed[^>]*?.*?</noembed>@siu',
// Add line breaks before & after blocks
'@<((br)|(hr))@iu',
'@</?((address)|(blockquote)|(center)|(del))@iu',
'@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
'@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
'@</?((table)|(th)|(td)|(caption))@iu',
'@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
'@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
'@</?((frameset)|(frame)|(iframe))@iu',
),
array(
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
"\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
"\n\$0", "\n\$0",
),
$text);
// Remove all remaining tags and comments and return.
return strip_tags($text);
}
// Function to truncate input to a maximum number of chars
function truncate_chars($text, $limit, $ellipsis = '') { // $ellipsis if you want have trailing dots (...) or something like that
if( strlen($text) > $limit ) {
$endpos = strpos(str_replace(array("\r\n", "\r", "\n", "\t"), ' ', $text), ' ', $limit);
if($endpos !== FALSE)
$text = trim(substr($text, 0, $endpos)) . $ellipsis;
}
return $text;
}
/* Step 1: Check if the input is actually sent and received
With isset we can check if there is input, before grabbing it,
to prevent a null error.
*/
if (isset($_POST['nameField']) &&
isset($_POST['emailField']) &&
isset($_POST['passwordField']) &&
isset($_POST['repeatPasswordField'])) { // (required fields first)
/* Step 2: Store input in memory, separate it from the source
Remove scripts from input and save it in a separate variable.
Step 3: Check variable for, and remove all scripting
All kinds of script tags are removed from the code. See more
details in the function above this code.
*/
$nameField = strip_html_tags($_POST['nameField']);
$emailField = strip_html_tags($_POST['emailField']);
$passwordField = strip_html_tags($_POST['passwordField']);
$repeatPasswordField = strip_html_tags($_POST['repeatPasswordField']);
if (isset($_POST['dateField'])) { // (and here the non-required fields)
$dateField = strip_html_tags($_POST['dateField']);
} else {
$dateField = "";
}
if (isset($_POST['urlField'])) { // (and here the non-required fields)
$urlField = strip_html_tags($_POST['urlField']);
} else {
$urlField = "";
}
/* Step 4: Trim the variable
Remove all trailing and preceding spaces from input (no use in keeping them).
You can skip this code if you do not want sanitization, we do a regular
expression match in the code later.
*/
$nameField = trim($nameField);
$emailField = trim($emailField);
$passwordField = trim($passwordField);
$repeatPasswordField = trim($repeatPasswordField);
$dateField = trim($dateField);
$urlField = trim($urlField);
/* Step 5: Truncate the variable to the maximum size of expected value
Break the variable to its maximum designated size to make sure that you
don't create a buffer overflow.
You can skip this code if you do not want sanitization, we do a regular
expression match in the code later.
*/
$nameField = truncate_chars($nameField, 50);
$emailField = truncate_chars($emailField, 254);
$passwordField = truncate_chars($passwordField, 16);
$repeatPasswordField = truncate_chars($repeatPasswordField, 16);
$dateField = truncate_chars($dateField, 10);
$urlField = truncate_chars($urlField, 254);
/* Now we are going to repeat steps 4 and 5 and do step 6.
If you don't want to do the steps 4 and 5 above, you for sure need to
do them below!
Step 6: Check if it is the correct variable type and/or format
Checking for correct data (format) in input.
*/
if (preg_match("/^.{1,50}$/", $nameField) &&
preg_match("/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/", $emailField) &&
preg_match("/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/", $passwordField) &&
preg_match("/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/", $repeatPasswordField) &&
(preg_match("/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/", $dateField) || $dateField == "") &&
(preg_match("/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/", $urlField) || $urlField == "") &&
$passwordField == $repeatPasswordField) {
// And we repeat step 6 here, by sanitizing the variables by using a PHP function
$emailField = filter_var($emailField, FILTER_SANITIZE_EMAIL);
$urlField = filter_var($urlField, FILTER_SANITIZE_URL);
/*
Step 7: Check if it is expected content (also called allowlisting)
Not relevant in this example.
Step 8: When relevant, check existence of local resources
Not relevant in this example.
Step 9: And now is it input for the process
Yes, all done and all well. We can now safely echo the values in oblivion!
*/
?><h1>Received values</h1>
<p><?php
echo 'Name: '.$nameField.'<br />';
echo 'Email: '.$emailField.'<br />';
echo 'Password: '.$passwordField.'<br />';
echo 'Password repeated: '.$repeatPasswordField.'<br />';
echo 'Birthdate: '.$dateField.'<br />';
echo 'Website: '.$urlField.'<br />';
?></p><?php
} else {
?><p>Not all fields are in proper format or passwords do not match.</p><?php
}
} else {
?><p>Not all required fields are received.</p><?php
}
?>
</body>
</html>
<body>
<!-- In the example below you will see the use of:
- required (whether a field is required or not)
- maxlength (what the maximum size of the value may be)
- pattern (regular expression)
These are basic checks concerning input validation, where
as the pattern is the most complex. Pattern is relatively new
and might not be supported on all browsers. Hence the extra
javascript checks that needs to be done also.
-->
<p id="errorMessage"></p>
<h1>Form for input values</h1>
<form name="testForm" action="exampleserver.php" onsubmit="return validateForm()" method="post">
<label for="nameField">Name *</label>
<input type="text" name="nameField" id="nameField" maxlength="50" placeholder="John Doe" pattern="^.{1,50}$" required>
<p>Full or partial name or nickname, maximum of 50 characters</p>
<label for="emailField">Email address *</label>
<input type="email" name="emailField" id="emailField" maxlength="254" placeholder="john@doe.com" pattern="\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b" required>
<p>Must be in format of john@doe.com</p>
<label for="passwordField">Password *</label>
<input type="password" name="passwordField" id="passwordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required>
<p>Requires at least one lower and one upper case letter, one digit, no spaces and a length of 8 to 16</p>
<label for="repeatPasswordField">Password *</label>
<input type="password" name="repeatPasswordField" id="repeatPasswordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required>
<p>Must be the same password as the one typed above</p>
<label for="dateField">Birthdate</label>
<input type="date" name="dateField" id="dateField" maxlength="10" placeholder="2000/12/31" pattern="^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$">
<p>Must be in format of mm/dd/yyyy</p>
<label for="urlField">Personal website</label>
<input type="url" name="urlField" id="urlField" maxlength="254" placeholder="http://www.johndoe.com" pattern="(^http://.{7,254})|(^https://.{7,254})">
<p>URL most start with http:// or https://</p>
<input type="submit" value="Submit">
<p>All fields with an asterisk (*) are required</p>
</form>
</body>
function validateForm() {
/* Step 1: Check if the input is actually sent and received
This step is only done when sending data from the client to the server.
So it holds no relevance here.
Step 2: Store input in memory, separate it from the source
Here we store all input in variables.
*/
var nameField = document.forms["testForm"]["nameField"].value,
emailField = document.forms["testForm"]["emailField"].value,
passwordField = document.forms["testForm"]["passwordField"].value,
repeatPasswordField = document.forms["testForm"]["repeatPasswordField"].value,
dateField = document.forms["testForm"]["dateField"].value,
urlField = document.forms["testForm"]["urlField"].value,
// To modify the error message on screen
formError = true,
errorMessage = document.getElementById("errorMessage"),
/* In the patterns below we check for the following steps.
Step 3: Check variable for, and remove all scripting
Besides using regular expression, we do not check for scripting
here. For example, the nameField element is just being check
if it does not exceed the 50 characters limit.
Step 4: Trim the variable
All regular expressions in this example have size limits.
Step 5: Truncate the variable to the maximum size of expected value
We do not use this step here. When the variable is to long, the
expression will fail and return an error.
Step 6: Check if it is the correct variable type and/or format
This step is done here (correct date, email and url format)
*/
namePattern = new RegExp(/^.{1,50}$/),
emailPattern = new RegExp(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/),
passwordPattern = new RegExp(/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/),
datePattern = new RegExp(/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/),
urlPattern = new RegExp(/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/);
errorMessage.innerHTML = "";
/* In this step we are going to get all the values from the forms.
We are going to test if there is content in them,
and in case of the required fields, we are going to require
the input.
*/
if (nameField === null || nameField === "" ||
emailField === null || emailField === "" ||
passwordField === null || passwordField === "" ||
repeatPasswordField === null || repeatPasswordField === "") {
errorMessage.innerHTML = "Not all required fields are filled in!";
formError = false;
} else {
/* Here we actually check if all input values matches the patterns
and if the passwords are equal to each other or not.
*/
if (namePattern.test(nameField) !== true &&
emailPattern.test(emailField) !== true &&
passwordPattern.test(passwordField) !== true &&
passwordPattern.test(repeatPasswordField) !== true &&
(datePattern.test(dateField) !== true || dateField === null || dateField === "") &&
(urlPattern.test(urlField) !== true || urlField === null || urlField === "") &&
passwordField === repeatPasswordField) {
errorMessage.innerHTML = "Not all fields are filled out correctly!";
formError = false;
} else {
errorMessage.innerHTML = "All fields are filled in correctly";
formError = true;
}
/*
Step 7: Check if it is expected content (also called allowlisting)
This is not done in this example.
Step 8: When relevant, check existence of local resources
This is not done in this example (not relevant).
*/
}
/* Step 9: And now is it input for the process
When formError becomes true, then all checks are done
and the outcome is valid. The input values are now
send to the server.
*/
return formError;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment