Created
May 2, 2012 19:53
-
-
Save aslatter/2579752 to your computer and use it in GitHub Desktop.
percent encoding in C
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "percent_encoding.h" | |
#include <string.h> | |
#include <stdio.h> | |
int main() | |
{ | |
char* input = "Hello, world!"; | |
int out_len = max_encoded_buffer_size(strlen(input)); | |
char output[out_len+1]; | |
out_len = percent_encode(input, strlen(input), output, out_len); | |
output[out_len] = 0; | |
printf("Input: %s\n", input); | |
printf("Output: %s\n", output); | |
char round_trip[out_len+1]; | |
out_len = percent_decode(output, out_len, round_trip, out_len); | |
round_trip[out_len] = 0; | |
printf("Decoded: %s\n", round_trip); | |
return 1; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdbool.h> | |
#include <ctype.h> | |
#include <stdint.h> | |
#include "percent_encoding.h" | |
const reserved_set all_reserved_set = | |
{{0,0,0,0,0,0,0,0, | |
0,0,0,0,0,0,0,0}}; | |
const reserved_set conservative_reserved_set = | |
{{0,0,0,0,0,96,255,3, | |
254,255,255,135,254,255,255,71}}; | |
const reserved_set liberal_reserved_set = | |
{{0,0,0,0,218,255,255,175, | |
255,255,255,175,254,255,255,71}}; | |
void mark_unreserved(reserved_set* set, char c) | |
{ | |
if (c > 127) return; | |
int quot = c / 8; | |
uint8_t rem = c % 8; | |
set->set[quot] |= (1 << rem); | |
} | |
void mark_reserved(reserved_set* set, char c) | |
{ | |
if (c > 127) return; | |
int quot = c / 8; | |
int rem = c % 8; | |
set->set[quot] &= ~(1 << rem); | |
} | |
bool is_reserved(const reserved_set* set, char c) | |
{ | |
if (c > 127) return true; | |
int quot = c / 8; | |
int rem = c % 8; | |
uint8_t mask = ~(1 << rem); | |
uint8_t bits = set->set[quot]; | |
return (bits & mask) == bits; | |
} | |
// If you need null termination, you'll need to account for that. We don't. | |
int max_encoded_buffer_size(int decoded_buffer_size) | |
{ | |
return 3 * decoded_buffer_size; | |
} | |
static char hex_encode(uint8_t half_byte) | |
{ | |
static char hex[] = "0123456789abcdef"; | |
return hex[half_byte & 15]; | |
} | |
static char hex_decode_char(uint8_t c) | |
{ | |
return isdigit(c) ? c - '0' : c - 'a' + 10; | |
} | |
static char hex_decode(uint8_t hi, uint8_t lo) | |
{ | |
return (hex_decode_char(hi) << 4) + hex_decode_char(lo); | |
} | |
static bool is_hex_char(int c) | |
{ | |
return (isdigit(c) || (c <= 'f' && c >= 'a')); | |
} | |
// returns number of bytes used in output buffer. | |
// if the output buffer is not big enough, this returns | |
// -1. | |
int percent_encode_custom(const reserved_set *set, char *in_buff, int in_len, char* out_buff, int out_len) | |
{ | |
int in_pos, out_pos; | |
in_pos = out_pos = 0; | |
while(in_pos < in_len) | |
{ | |
char c = in_buff[in_pos++]; | |
if(!is_reserved(set, c)) | |
{ | |
if(out_pos + 1 > out_len) | |
return -1; | |
out_buff[out_pos++] = c; | |
} | |
else | |
{ | |
if(out_pos + 3 > out_len) | |
return -1; | |
out_buff[out_pos++] = '%'; | |
out_buff[out_pos++] = hex_encode(c >> 4); | |
out_buff[out_pos++] = hex_encode(c); | |
} | |
} | |
// return space used in out buffer | |
return out_pos; | |
} | |
int percent_encode(char *in_buff, int in_len, char* out_buff, int out_len) | |
{ | |
return percent_encode_custom(&liberal_reserved_set, in_buff, in_len, out_buff, out_len); | |
} | |
// returns length used in the output buffer. | |
// returns -1 on error, either due to: | |
// - malformed input | |
// - unexpected end-of-buffer for input | |
// - inexpected end-of-buffer for output | |
int percent_decode(char *in_buff, int in_len, char* out_buff, int out_len) | |
{ | |
int in_pos, out_pos; | |
in_pos = out_pos = 0; | |
while(in_pos < in_len) | |
{ | |
if(out_pos >= out_len) | |
return -1; | |
char c = in_buff[in_pos++]; | |
if(c == '%') | |
{ | |
if(!(in_pos + 1 < in_len)) | |
return -1; | |
char h_hi = tolower(in_buff[in_pos++]); | |
char h_lo = tolower(in_buff[in_pos++]); | |
if(!is_hex_char(h_hi) || !is_hex_char(h_lo)) | |
return -1; | |
out_buff[out_pos++] = hex_decode(h_hi, h_lo); | |
} | |
else | |
{ | |
out_buff[out_pos++] = c; | |
} | |
} | |
return out_pos; | |
} | |
/* Tools to produce reserved_set constants | |
void print_reserved_set(FILE* f, reserved_set* set) | |
{ | |
fprintf(f, "Unreserved chars: "); | |
for(int i = 0; i < 128; i++) | |
{ | |
if(!is_reserved(set, i)) | |
{ | |
fprintf(f, "%c", i); | |
} | |
} | |
fprintf(f, "\n"); | |
} | |
void print_set_bytes(FILE* f, reserved_set* set) | |
{ | |
fprintf(f, "{"); | |
for(int i = 0; i < 15; i++) | |
fprintf(f, "%i,", set->set[i]); | |
fprintf(f, "%i}", set->set[15]); | |
} | |
void set_conservative_unreserved(reserved_set *set) | |
{ | |
char c; | |
for(c = 'a'; c <= 'z'; c++) | |
{ | |
mark_unreserved(set, c); | |
mark_unreserved(set, toupper(c)); | |
} | |
for(c = '0'; c <= '9'; c++) | |
mark_unreserved(set, c); | |
mark_unreserved(set, '-'); | |
mark_unreserved(set, '_'); | |
mark_unreserved(set, '.'); | |
mark_unreserved(set, '~'); | |
} | |
void set_liberal_unreserved(reserved_set *set) | |
{ | |
mark_unreserved(set, '!'); | |
mark_unreserved(set, '*'); | |
mark_unreserved(set, '\''); | |
mark_unreserved(set, '('); | |
mark_unreserved(set, ')'); | |
mark_unreserved(set, ';'); | |
mark_unreserved(set, ':'); | |
mark_unreserved(set, '@'); | |
mark_unreserved(set, '&'); | |
mark_unreserved(set, '='); | |
mark_unreserved(set, '+'); | |
mark_unreserved(set, '$'); | |
mark_unreserved(set, ','); | |
mark_unreserved(set, '/'); | |
mark_unreserved(set, '?'); | |
mark_unreserved(set, '#'); | |
mark_unreserved(set, '['); | |
mark_unreserved(set, ']'); | |
} | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef H_PERCENT_ENCODING | |
#define H_PERCENT_ENCODING | |
#include <stdint.h> | |
#include <stdbool.h> | |
typedef struct { | |
uint8_t set[16] | |
} reserved_set; | |
int max_encoded_buffer_size(int decoded_buffer_size); | |
int percent_encode(char *in_buff, int in_len, char* out_buff, int out_len); | |
int percent_encode_custom(const reserved_set *set, char *in_buff, int in_len, char* out_buff, int out_len); | |
int percent_decode(char *in_buff, int in_len, char* out_buff, int out_len); | |
extern const reserved_set all_reserved_set; | |
extern const reserved_set conservative_reserved_set; | |
extern const reserved_set liberal_reserved_set; | |
void mark_unreserved(reserved_set* set, char c); | |
void mark_reserved(reserved_set* set, char c); | |
bool is_reserved(const reserved_set* set, char c); | |
#endif // header |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment