Skip to content

Instantly share code, notes, and snippets.

@wqweto
Created September 17, 2013 12:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wqweto/6593791 to your computer and use it in GitHub Desktop.
Save wqweto/6593791 to your computer and use it in GitHub Desktop.
//Convert all unsafe characters in szStringIn to escape sequences
//lpszStringIn and lpszStringOut should be different strings
inline _Success_(return != FALSE) BOOL AtlEscapeUrl(
_In_z_ LPCSTR szStringIn,
_Out_writes_to_(dwMaxLength, *pdwStrLen) LPSTR szStringOut,
_Out_opt_ DWORD* pdwStrLen,
_In_ DWORD dwMaxLength,
_In_ DWORD dwFlags = 0)
{
ATLENSURE( szStringIn != NULL );
ATLENSURE( szStringOut != NULL );
ATLENSURE( szStringIn != szStringOut );
char ch;
DWORD dwLen = 0;
BOOL bRet = TRUE;
BOOL bSchemeFile = FALSE;
DWORD dwColonPos = 0;
DWORD dwFlagsInternal = dwFlags;
//The next 2 are for buffer security checks
LPSTR szOrigStringOut=szStringOut;
LPSTR szStringOutEnd = (szStringOut + dwMaxLength);
while((ch = *szStringIn++) != '\0')
{
//if we are at the maximum length, set bRet to FALSE
//this ensures no more data is written to szStringOut, but
//the length of the string is still updated, so the user
//knows how much space to allocate
if (dwLen == dwMaxLength)
{
bRet = FALSE;
}
//Keep track of the first ':' position to match the weird way
//InternetCanonicalizeUrl handles it
if (ch == ':' && (dwFlagsInternal & ATL_URL_CANONICALIZE) && !dwColonPos)
{
if (bRet)
{
*szStringOut = '\0';
LPSTR pszStrToLower=szStringOut-dwLen;
ATLENSURE(pszStrToLower >= szOrigStringOut && pszStrToLower <= szStringOutEnd);
Checked::strlwr_s(pszStrToLower,szStringOutEnd-pszStrToLower+1);
if (dwLen == 4 && !strncmp("file", (szStringOut-4), 4))
{
bSchemeFile = TRUE;
}
}
dwColonPos = dwLen+1;
}
else if (ch == '%' && (dwFlagsInternal & ATL_URL_DECODE))
{
//decode the escaped sequence
if (*szStringIn != '\0')
{
short nFirstDigit = AtlHexValue(*szStringIn++);
if( nFirstDigit < 0 )
{
bRet = FALSE;
break;
}
ch = static_cast<char>(16*nFirstDigit);
if (*szStringIn != '\0')
{
short nSecondDigit = AtlHexValue(*szStringIn++);
if( nSecondDigit < 0 )
{
bRet = FALSE;
break;
}
ch = static_cast<char>(ch+nSecondDigit);
}
else
{
break;
}
}
else
{
break;
}
}
else if ((ch == '?' || ch == '#') && (dwFlagsInternal & ATL_URL_BROWSER_MODE))
{
//ATL_URL_BROWSER mode does not encode after a '?' or a '#'
dwFlagsInternal |= ATL_URL_NO_ENCODE;
}
if ((dwFlagsInternal & ATL_URL_CANONICALIZE) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
{
//canonicalize the '\' to '/'
if (ch == '\\' && (dwColonPos || (dwFlagsInternal & ATL_URL_COMBINE)) && bRet)
{
//if the scheme is not file or it is file and the '\' is in "file:\\"
//NOTE: This is to match the way InternetCanonicalizeUrl handles this case
if (!bSchemeFile || (dwLen < 7))
{
ch = '/';
}
}
else if (ch == '.' && dwLen > 0 && (dwFlagsInternal & ATL_URL_NO_META)==0)
{
//if we are escaping meta sequences, attempt to do so
if (AtlEscapeUrlMetaHelper(&szStringOut, szStringOut-1, dwLen, (char**)(&szStringIn), &dwLen, dwFlagsInternal, dwColonPos))
continue;
}
}
//if we are encoding and it is an unsafe character
if (AtlIsUnsafeUrlChar(ch) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
{
//if we are only encoding spaces, and ch is not a space or
//if we are not encoding meta sequences and it is a dot or
//if we not encoding percents and it is a percent
if (((dwFlagsInternal & ATL_URL_ENCODE_SPACES_ONLY) && ch != ' ') ||
((dwFlagsInternal & ATL_URL_NO_META) && ch == '.') ||
(((dwFlagsInternal & ATL_URL_ENCODE_PERCENT) == 0) && ch == '%'))
{
//just output it without encoding
if (bRet)
*szStringOut++ = ch;
}
else
{
//if there is not enough space for the escape sequence
if (dwLen >= (dwMaxLength-3))
{
bRet = FALSE;
}
if (bRet)
{
//output the percent, followed by the hex value of the character
LPSTR pszTmp = szStringOut;
*pszTmp++ = '%';
if ((unsigned char)ch < 16)
{
*pszTmp++ = '0';
}
Checked::ultoa_s((unsigned char)ch, pszTmp, szStringOutEnd-pszTmp, 16);
szStringOut+= sizeof("%FF")-1;
}
dwLen += sizeof("%FF")-2;
}
}
else //safe character
{
if (bRet)
*szStringOut++ = ch;
}
dwLen++;
}
if (bRet && dwLen < dwMaxLength)
*szStringOut = '\0';
if (pdwStrLen)
*pdwStrLen = dwLen + 1;
if (dwLen+1 > dwMaxLength)
bRet = FALSE;
return bRet;
}
//Determine if the character is unsafe under the URI RFC document
inline BOOL AtlIsUnsafeUrlChar(_In_ char chIn) throw()
{
unsigned char ch = (unsigned char)chIn;
switch(ch)
{
case ';': case '\\': case '?': case '@': case '&':
case '=': case '+': case '$': case ',': case ' ':
case '<': case '>': case '#': case '%': case '\"':
case '{': case '}': case '|':
case '^': case '[': case ']': case '`':
return TRUE;
default:
{
if (ch < 32 || ch > 126)
return TRUE;
return FALSE;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment