Skip to content

Instantly share code, notes, and snippets.

Created September 17, 2013 12:38
Show Gist options
  • Save wqweto/6593791 to your computer and use it in GitHub Desktop.
Save wqweto/6593791 to your computer and use it in GitHub Desktop.
//Convert all unsafe characters in szStringIn to escape sequences
//lpszStringIn and lpszStringOut should be different strings
inline _Success_(return != FALSE) BOOL AtlEscapeUrl(
_In_z_ LPCSTR szStringIn,
_Out_writes_to_(dwMaxLength, *pdwStrLen) LPSTR szStringOut,
_Out_opt_ DWORD* pdwStrLen,
_In_ DWORD dwMaxLength,
_In_ DWORD dwFlags = 0)
ATLENSURE( szStringIn != NULL );
ATLENSURE( szStringOut != NULL );
ATLENSURE( szStringIn != szStringOut );
char ch;
DWORD dwLen = 0;
BOOL bSchemeFile = FALSE;
DWORD dwColonPos = 0;
DWORD dwFlagsInternal = dwFlags;
//The next 2 are for buffer security checks
LPSTR szOrigStringOut=szStringOut;
LPSTR szStringOutEnd = (szStringOut + dwMaxLength);
while((ch = *szStringIn++) != '\0')
//if we are at the maximum length, set bRet to FALSE
//this ensures no more data is written to szStringOut, but
//the length of the string is still updated, so the user
//knows how much space to allocate
if (dwLen == dwMaxLength)
bRet = FALSE;
//Keep track of the first ':' position to match the weird way
//InternetCanonicalizeUrl handles it
if (ch == ':' && (dwFlagsInternal & ATL_URL_CANONICALIZE) && !dwColonPos)
if (bRet)
*szStringOut = '\0';
LPSTR pszStrToLower=szStringOut-dwLen;
ATLENSURE(pszStrToLower >= szOrigStringOut && pszStrToLower <= szStringOutEnd);
if (dwLen == 4 && !strncmp("file", (szStringOut-4), 4))
bSchemeFile = TRUE;
dwColonPos = dwLen+1;
else if (ch == '%' && (dwFlagsInternal & ATL_URL_DECODE))
//decode the escaped sequence
if (*szStringIn != '\0')
short nFirstDigit = AtlHexValue(*szStringIn++);
if( nFirstDigit < 0 )
bRet = FALSE;
ch = static_cast<char>(16*nFirstDigit);
if (*szStringIn != '\0')
short nSecondDigit = AtlHexValue(*szStringIn++);
if( nSecondDigit < 0 )
bRet = FALSE;
ch = static_cast<char>(ch+nSecondDigit);
else if ((ch == '?' || ch == '#') && (dwFlagsInternal & ATL_URL_BROWSER_MODE))
//ATL_URL_BROWSER mode does not encode after a '?' or a '#'
dwFlagsInternal |= ATL_URL_NO_ENCODE;
if ((dwFlagsInternal & ATL_URL_CANONICALIZE) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
//canonicalize the '\' to '/'
if (ch == '\\' && (dwColonPos || (dwFlagsInternal & ATL_URL_COMBINE)) && bRet)
//if the scheme is not file or it is file and the '\' is in "file:\\"
//NOTE: This is to match the way InternetCanonicalizeUrl handles this case
if (!bSchemeFile || (dwLen < 7))
ch = '/';
else if (ch == '.' && dwLen > 0 && (dwFlagsInternal & ATL_URL_NO_META)==0)
//if we are escaping meta sequences, attempt to do so
if (AtlEscapeUrlMetaHelper(&szStringOut, szStringOut-1, dwLen, (char**)(&szStringIn), &dwLen, dwFlagsInternal, dwColonPos))
//if we are encoding and it is an unsafe character
if (AtlIsUnsafeUrlChar(ch) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
//if we are only encoding spaces, and ch is not a space or
//if we are not encoding meta sequences and it is a dot or
//if we not encoding percents and it is a percent
if (((dwFlagsInternal & ATL_URL_ENCODE_SPACES_ONLY) && ch != ' ') ||
((dwFlagsInternal & ATL_URL_NO_META) && ch == '.') ||
(((dwFlagsInternal & ATL_URL_ENCODE_PERCENT) == 0) && ch == '%'))
//just output it without encoding
if (bRet)
*szStringOut++ = ch;
//if there is not enough space for the escape sequence
if (dwLen >= (dwMaxLength-3))
bRet = FALSE;
if (bRet)
//output the percent, followed by the hex value of the character
LPSTR pszTmp = szStringOut;
*pszTmp++ = '%';
if ((unsigned char)ch < 16)
*pszTmp++ = '0';
Checked::ultoa_s((unsigned char)ch, pszTmp, szStringOutEnd-pszTmp, 16);
szStringOut+= sizeof("%FF")-1;
dwLen += sizeof("%FF")-2;
else //safe character
if (bRet)
*szStringOut++ = ch;
if (bRet && dwLen < dwMaxLength)
*szStringOut = '\0';
if (pdwStrLen)
*pdwStrLen = dwLen + 1;
if (dwLen+1 > dwMaxLength)
bRet = FALSE;
return bRet;
//Determine if the character is unsafe under the URI RFC document
inline BOOL AtlIsUnsafeUrlChar(_In_ char chIn) throw()
unsigned char ch = (unsigned char)chIn;
case ';': case '\\': case '?': case '@': case '&':
case '=': case '+': case '$': case ',': case ' ':
case '<': case '>': case '#': case '%': case '\"':
case '{': case '}': case '|':
case '^': case '[': case ']': case '`':
return TRUE;
if (ch < 32 || ch > 126)
return TRUE;
return FALSE;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment