Last active
June 14, 2020 07:36
-
-
Save teusink/132d52260f81ab55baa9 to your computer and use it in GitHub Desktop.
Example coding on input validation for web-applications (do not use blindly in production environments!).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<title>Input validation for web-applications, how to process input safely and securely</title> | |
<script> | |
function validateForm() { | |
/* Step 1: Check if the input is actually sent and received | |
This step is only done when sending data from the client to the server. | |
So it holds no relevance here. | |
Step 2: Store input in memory, separate it from the source | |
Here we store all input in variables. | |
*/ | |
var nameField = document.forms["testForm"]["nameField"].value, | |
emailField = document.forms["testForm"]["emailField"].value, | |
passwordField = document.forms["testForm"]["passwordField"].value, | |
repeatPasswordField = document.forms["testForm"]["repeatPasswordField"].value, | |
dateField = document.forms["testForm"]["dateField"].value, | |
urlField = document.forms["testForm"]["urlField"].value, | |
// To modify the error message on screen | |
formError = true, | |
errorMessage = document.getElementById("errorMessage"), | |
/* In the patterns below we check for the following steps. | |
Step 3: Check variable for, and remove all scripting | |
Besides using regular expression, we do not check for scripting | |
here. For example, the nameField element is just being check | |
if it does not exceed the 50 characters limit. | |
Step 4: Trim the variable | |
All regular expressions in this example have size limits. | |
Step 5: Truncate the variable to the maximum size of expected value | |
We do not use this step here. When the variable is to long, the | |
expression will fail and return an error. | |
Step 6: Check if it is the correct variable type and/or format | |
This step is done here (correct date, email and url format) | |
*/ | |
namePattern = new RegExp(/^.{1,50}$/), | |
emailPattern = new RegExp(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/), | |
passwordPattern = new RegExp(/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/), | |
datePattern = new RegExp(/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/), | |
urlPattern = new RegExp(/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/); | |
errorMessage.innerHTML = ""; | |
/* In this step we are going to get all the values from the forms. | |
We are going to test if there is content in them, | |
and in case of the required fields, we are going to require | |
the input. | |
*/ | |
if (nameField === null || nameField === "" || | |
emailField === null || emailField === "" || | |
passwordField === null || passwordField === "" || | |
repeatPasswordField === null || repeatPasswordField === "") { | |
errorMessage.innerHTML = "Not all required fields are filled in!"; | |
formError = false; | |
} else { | |
/* Here we actually check if all input values matches the patterns | |
and if the passwords are equal to each other or not. | |
*/ | |
if (namePattern.test(nameField) !== true && | |
emailPattern.test(emailField) !== true && | |
passwordPattern.test(passwordField) !== true && | |
passwordPattern.test(repeatPasswordField) !== true && | |
(datePattern.test(dateField) !== true || dateField === null || dateField === "") && | |
(urlPattern.test(urlField) !== true || urlField === null || urlField === "") && | |
passwordField === repeatPasswordField) { | |
errorMessage.innerHTML = "Not all fields are filled out correctly!"; | |
formError = false; | |
} else { | |
errorMessage.innerHTML = "All fields are filled in correctly"; | |
formError = true; | |
} | |
/* | |
Step 7: Check if it is expected content (also called allowlisting) | |
This is not done in this example. | |
Step 8: When relevant, check existence of local resources | |
This is not done in this example (not relevant). | |
*/ | |
} | |
/* Step 9: And now is it input for the process | |
When formError becomes true, then all checks are done | |
and the outcome is valid. The input values are now | |
send to the server. | |
*/ | |
return formError; | |
} | |
</script> | |
</head> | |
<body> | |
<!-- In the example below you will see the use of: | |
- required (whether a field is required or not) | |
- maxlength (what the maximum size of the value may be) | |
- pattern (regular expression) | |
These are basic checks concerning input validation, where | |
as the pattern is the most complex. Pattern is relatively new | |
and might not be supported on all browsers. Hence the extra | |
javascript checks that needs to be done also. | |
--> | |
<p id="errorMessage"></p> | |
<h1>Form for input values</h1> | |
<form name="testForm" action="exampleserver.php" onsubmit="return validateForm()" method="post"> | |
<label for="nameField">Name *</label> | |
<input type="text" name="nameField" id="nameField" maxlength="50" placeholder="John Doe" pattern="^.{1,50}$" required> | |
<p>Full or partial name or nickname, maximum of 50 characters</p> | |
<label for="emailField">Email address *</label> | |
<input type="email" name="emailField" id="emailField" maxlength="254" placeholder="john@doe.com" pattern="\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b" required> | |
<p>Must be in format of john@doe.com</p> | |
<label for="passwordField">Password *</label> | |
<input type="password" name="passwordField" id="passwordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required> | |
<p>Requires at least one lower and one upper case letter, one digit, no spaces and a length of 8 to 16</p> | |
<label for="repeatPasswordField">Password *</label> | |
<input type="password" name="repeatPasswordField" id="repeatPasswordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required> | |
<p>Must be the same password as the one typed above</p> | |
<label for="dateField">Birthdate</label> | |
<input type="date" name="dateField" id="dateField" maxlength="10" placeholder="2000/12/31" pattern="^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$"> | |
<p>Must be in format of mm/dd/yyyy</p> | |
<label for="urlField">Personal website</label> | |
<input type="url" name="urlField" id="urlField" maxlength="254" placeholder="http://www.johndoe.com" pattern="(^http://.{7,254})|(^https://.{7,254})"> | |
<p>URL most start with http:// or https://</p> | |
<input type="submit" value="Submit"> | |
<p>All fields with an asterisk (*) are required</p> | |
</form> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<title>Input validation for web-applications, how to process input safely and securely</title> | |
</head> | |
<body> | |
<?php | |
// Function to clean input from al its tags | |
function strip_html_tags($text) { | |
// PHP's strip_tags() function will remove tags, but it | |
// doesn't remove scripts, styles, and other unwanted | |
// invisible text between tags. Also, as a prelude to | |
// tokenizing the text, we need to insure that when | |
// block-level tags (such as <p> or <div>) are removed, | |
// neighboring words aren't joined. | |
$text = preg_replace( | |
array( | |
// Remove invisible content | |
'@<head[^>]*?>.*?</head>@siu', | |
'@<style[^>]*?>.*?</style>@siu', | |
'@<script[^>]*?.*?</script>@siu', | |
'@<object[^>]*?.*?</object>@siu', | |
'@<embed[^>]*?.*?</embed>@siu', | |
'@<applet[^>]*?.*?</applet>@siu', | |
'@<noframes[^>]*?.*?</noframes>@siu', | |
'@<noscript[^>]*?.*?</noscript>@siu', | |
'@<noembed[^>]*?.*?</noembed>@siu', | |
// Add line breaks before & after blocks | |
'@<((br)|(hr))@iu', | |
'@</?((address)|(blockquote)|(center)|(del))@iu', | |
'@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu', | |
'@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu', | |
'@</?((table)|(th)|(td)|(caption))@iu', | |
'@</?((form)|(button)|(fieldset)|(legend)|(input))@iu', | |
'@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu', | |
'@</?((frameset)|(frame)|(iframe))@iu', | |
), | |
array( | |
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', | |
"\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", | |
"\n\$0", "\n\$0", | |
), | |
$text); | |
// Remove all remaining tags and comments and return. | |
return strip_tags($text); | |
} | |
// Function to truncate input to a maximum number of chars | |
function truncate_chars($text, $limit, $ellipsis = '') { // $ellipsis if you want have trailing dots (...) or something like that | |
if( strlen($text) > $limit ) { | |
$endpos = strpos(str_replace(array("\r\n", "\r", "\n", "\t"), ' ', $text), ' ', $limit); | |
if($endpos !== FALSE) | |
$text = trim(substr($text, 0, $endpos)) . $ellipsis; | |
} | |
return $text; | |
} | |
/* Step 1: Check if the input is actually sent and received | |
With isset we can check if there is input, before grabbing it, | |
to prevent a null error. | |
*/ | |
if (isset($_POST['nameField']) && | |
isset($_POST['emailField']) && | |
isset($_POST['passwordField']) && | |
isset($_POST['repeatPasswordField'])) { // (required fields first) | |
/* Step 2: Store input in memory, separate it from the source | |
Remove scripts from input and save it in a separate variable. | |
Step 3: Check variable for, and remove all scripting | |
All kinds of script tags are removed from the code. See more | |
details in the function above this code. | |
*/ | |
$nameField = strip_html_tags($_POST['nameField']); | |
$emailField = strip_html_tags($_POST['emailField']); | |
$passwordField = strip_html_tags($_POST['passwordField']); | |
$repeatPasswordField = strip_html_tags($_POST['repeatPasswordField']); | |
if (isset($_POST['dateField'])) { // (and here the non-required fields) | |
$dateField = strip_html_tags($_POST['dateField']); | |
} else { | |
$dateField = ""; | |
} | |
if (isset($_POST['urlField'])) { // (and here the non-required fields) | |
$urlField = strip_html_tags($_POST['urlField']); | |
} else { | |
$urlField = ""; | |
} | |
/* Step 4: Trim the variable | |
Remove all trailing and preceding spaces from input (no use in keeping them). | |
You can skip this code if you do not want sanitization, we do a regular | |
expression match in the code later. | |
*/ | |
$nameField = trim($nameField); | |
$emailField = trim($emailField); | |
$passwordField = trim($passwordField); | |
$repeatPasswordField = trim($repeatPasswordField); | |
$dateField = trim($dateField); | |
$urlField = trim($urlField); | |
/* Step 5: Truncate the variable to the maximum size of expected value | |
Break the variable to its maximum designated size to make sure that you | |
don't create a buffer overflow. | |
You can skip this code if you do not want sanitization, we do a regular | |
expression match in the code later. | |
*/ | |
$nameField = truncate_chars($nameField, 50); | |
$emailField = truncate_chars($emailField, 254); | |
$passwordField = truncate_chars($passwordField, 16); | |
$repeatPasswordField = truncate_chars($repeatPasswordField, 16); | |
$dateField = truncate_chars($dateField, 10); | |
$urlField = truncate_chars($urlField, 254); | |
/* Now we are going to repeat steps 4 and 5 and do step 6. | |
If you don't want to do the steps 4 and 5 above, you for sure need to | |
do them below! | |
Step 6: Check if it is the correct variable type and/or format | |
Checking for correct data (format) in input. | |
*/ | |
if (preg_match("/^.{1,50}$/", $nameField) && | |
preg_match("/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/", $emailField) && | |
preg_match("/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/", $passwordField) && | |
preg_match("/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/", $repeatPasswordField) && | |
(preg_match("/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/", $dateField) || $dateField == "") && | |
(preg_match("/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/", $urlField) || $urlField == "") && | |
$passwordField == $repeatPasswordField) { | |
// And we repeat step 6 here, by sanitizing the variables by using a PHP function | |
$emailField = filter_var($emailField, FILTER_SANITIZE_EMAIL); | |
$urlField = filter_var($urlField, FILTER_SANITIZE_URL); | |
/* | |
Step 7: Check if it is expected content (also called allowlisting) | |
Not relevant in this example. | |
Step 8: When relevant, check existence of local resources | |
Not relevant in this example. | |
Step 9: And now is it input for the process | |
Yes, all done and all well. We can now safely echo the values in oblivion! | |
*/ | |
?><h1>Received values</h1> | |
<p><?php | |
echo 'Name: '.$nameField.'<br />'; | |
echo 'Email: '.$emailField.'<br />'; | |
echo 'Password: '.$passwordField.'<br />'; | |
echo 'Password repeated: '.$repeatPasswordField.'<br />'; | |
echo 'Birthdate: '.$dateField.'<br />'; | |
echo 'Website: '.$urlField.'<br />'; | |
?></p><?php | |
} else { | |
?><p>Not all fields are in proper format or passwords do not match.</p><?php | |
} | |
} else { | |
?><p>Not all required fields are received.</p><?php | |
} | |
?> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<body> | |
<!-- In the example below you will see the use of: | |
- required (whether a field is required or not) | |
- maxlength (what the maximum size of the value may be) | |
- pattern (regular expression) | |
These are basic checks concerning input validation, where | |
as the pattern is the most complex. Pattern is relatively new | |
and might not be supported on all browsers. Hence the extra | |
javascript checks that needs to be done also. | |
--> | |
<p id="errorMessage"></p> | |
<h1>Form for input values</h1> | |
<form name="testForm" action="exampleserver.php" onsubmit="return validateForm()" method="post"> | |
<label for="nameField">Name *</label> | |
<input type="text" name="nameField" id="nameField" maxlength="50" placeholder="John Doe" pattern="^.{1,50}$" required> | |
<p>Full or partial name or nickname, maximum of 50 characters</p> | |
<label for="emailField">Email address *</label> | |
<input type="email" name="emailField" id="emailField" maxlength="254" placeholder="john@doe.com" pattern="\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b" required> | |
<p>Must be in format of john@doe.com</p> | |
<label for="passwordField">Password *</label> | |
<input type="password" name="passwordField" id="passwordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required> | |
<p>Requires at least one lower and one upper case letter, one digit, no spaces and a length of 8 to 16</p> | |
<label for="repeatPasswordField">Password *</label> | |
<input type="password" name="repeatPasswordField" id="repeatPasswordField" maxlength="16" pattern="^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$" required> | |
<p>Must be the same password as the one typed above</p> | |
<label for="dateField">Birthdate</label> | |
<input type="date" name="dateField" id="dateField" maxlength="10" placeholder="2000/12/31" pattern="^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$"> | |
<p>Must be in format of mm/dd/yyyy</p> | |
<label for="urlField">Personal website</label> | |
<input type="url" name="urlField" id="urlField" maxlength="254" placeholder="http://www.johndoe.com" pattern="(^http://.{7,254})|(^https://.{7,254})"> | |
<p>URL most start with http:// or https://</p> | |
<input type="submit" value="Submit"> | |
<p>All fields with an asterisk (*) are required</p> | |
</form> | |
</body> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function validateForm() { | |
/* Step 1: Check if the input is actually sent and received | |
This step is only done when sending data from the client to the server. | |
So it holds no relevance here. | |
Step 2: Store input in memory, separate it from the source | |
Here we store all input in variables. | |
*/ | |
var nameField = document.forms["testForm"]["nameField"].value, | |
emailField = document.forms["testForm"]["emailField"].value, | |
passwordField = document.forms["testForm"]["passwordField"].value, | |
repeatPasswordField = document.forms["testForm"]["repeatPasswordField"].value, | |
dateField = document.forms["testForm"]["dateField"].value, | |
urlField = document.forms["testForm"]["urlField"].value, | |
// To modify the error message on screen | |
formError = true, | |
errorMessage = document.getElementById("errorMessage"), | |
/* In the patterns below we check for the following steps. | |
Step 3: Check variable for, and remove all scripting | |
Besides using regular expression, we do not check for scripting | |
here. For example, the nameField element is just being check | |
if it does not exceed the 50 characters limit. | |
Step 4: Trim the variable | |
All regular expressions in this example have size limits. | |
Step 5: Truncate the variable to the maximum size of expected value | |
We do not use this step here. When the variable is to long, the | |
expression will fail and return an error. | |
Step 6: Check if it is the correct variable type and/or format | |
This step is done here (correct date, email and url format) | |
*/ | |
namePattern = new RegExp(/^.{1,50}$/), | |
emailPattern = new RegExp(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}\b/), | |
passwordPattern = new RegExp(/^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{8,16}$/), | |
datePattern = new RegExp(/^(19|20)\d\d[- \/.](0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])$/), | |
urlPattern = new RegExp(/(^http:\/\/.{7,254})|(^https:\/\/.{7,254})/); | |
errorMessage.innerHTML = ""; | |
/* In this step we are going to get all the values from the forms. | |
We are going to test if there is content in them, | |
and in case of the required fields, we are going to require | |
the input. | |
*/ | |
if (nameField === null || nameField === "" || | |
emailField === null || emailField === "" || | |
passwordField === null || passwordField === "" || | |
repeatPasswordField === null || repeatPasswordField === "") { | |
errorMessage.innerHTML = "Not all required fields are filled in!"; | |
formError = false; | |
} else { | |
/* Here we actually check if all input values matches the patterns | |
and if the passwords are equal to each other or not. | |
*/ | |
if (namePattern.test(nameField) !== true && | |
emailPattern.test(emailField) !== true && | |
passwordPattern.test(passwordField) !== true && | |
passwordPattern.test(repeatPasswordField) !== true && | |
(datePattern.test(dateField) !== true || dateField === null || dateField === "") && | |
(urlPattern.test(urlField) !== true || urlField === null || urlField === "") && | |
passwordField === repeatPasswordField) { | |
errorMessage.innerHTML = "Not all fields are filled out correctly!"; | |
formError = false; | |
} else { | |
errorMessage.innerHTML = "All fields are filled in correctly"; | |
formError = true; | |
} | |
/* | |
Step 7: Check if it is expected content (also called allowlisting) | |
This is not done in this example. | |
Step 8: When relevant, check existence of local resources | |
This is not done in this example (not relevant). | |
*/ | |
} | |
/* Step 9: And now is it input for the process | |
When formError becomes true, then all checks are done | |
and the outcome is valid. The input values are now | |
send to the server. | |
*/ | |
return formError; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment