Created
September 17, 2013 12:38
-
-
Save wqweto/6593791 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Convert all unsafe characters in szStringIn to escape sequences | |
//lpszStringIn and lpszStringOut should be different strings | |
inline _Success_(return != FALSE) BOOL AtlEscapeUrl( | |
_In_z_ LPCSTR szStringIn, | |
_Out_writes_to_(dwMaxLength, *pdwStrLen) LPSTR szStringOut, | |
_Out_opt_ DWORD* pdwStrLen, | |
_In_ DWORD dwMaxLength, | |
_In_ DWORD dwFlags = 0) | |
{ | |
ATLENSURE( szStringIn != NULL ); | |
ATLENSURE( szStringOut != NULL ); | |
ATLENSURE( szStringIn != szStringOut ); | |
char ch; | |
DWORD dwLen = 0; | |
BOOL bRet = TRUE; | |
BOOL bSchemeFile = FALSE; | |
DWORD dwColonPos = 0; | |
DWORD dwFlagsInternal = dwFlags; | |
//The next 2 are for buffer security checks | |
LPSTR szOrigStringOut=szStringOut; | |
LPSTR szStringOutEnd = (szStringOut + dwMaxLength); | |
while((ch = *szStringIn++) != '\0') | |
{ | |
//if we are at the maximum length, set bRet to FALSE | |
//this ensures no more data is written to szStringOut, but | |
//the length of the string is still updated, so the user | |
//knows how much space to allocate | |
if (dwLen == dwMaxLength) | |
{ | |
bRet = FALSE; | |
} | |
//Keep track of the first ':' position to match the weird way | |
//InternetCanonicalizeUrl handles it | |
if (ch == ':' && (dwFlagsInternal & ATL_URL_CANONICALIZE) && !dwColonPos) | |
{ | |
if (bRet) | |
{ | |
*szStringOut = '\0'; | |
LPSTR pszStrToLower=szStringOut-dwLen; | |
ATLENSURE(pszStrToLower >= szOrigStringOut && pszStrToLower <= szStringOutEnd); | |
Checked::strlwr_s(pszStrToLower,szStringOutEnd-pszStrToLower+1); | |
if (dwLen == 4 && !strncmp("file", (szStringOut-4), 4)) | |
{ | |
bSchemeFile = TRUE; | |
} | |
} | |
dwColonPos = dwLen+1; | |
} | |
else if (ch == '%' && (dwFlagsInternal & ATL_URL_DECODE)) | |
{ | |
//decode the escaped sequence | |
if (*szStringIn != '\0') | |
{ | |
short nFirstDigit = AtlHexValue(*szStringIn++); | |
if( nFirstDigit < 0 ) | |
{ | |
bRet = FALSE; | |
break; | |
} | |
ch = static_cast<char>(16*nFirstDigit); | |
if (*szStringIn != '\0') | |
{ | |
short nSecondDigit = AtlHexValue(*szStringIn++); | |
if( nSecondDigit < 0 ) | |
{ | |
bRet = FALSE; | |
break; | |
} | |
ch = static_cast<char>(ch+nSecondDigit); | |
} | |
else | |
{ | |
break; | |
} | |
} | |
else | |
{ | |
break; | |
} | |
} | |
else if ((ch == '?' || ch == '#') && (dwFlagsInternal & ATL_URL_BROWSER_MODE)) | |
{ | |
//ATL_URL_BROWSER mode does not encode after a '?' or a '#' | |
dwFlagsInternal |= ATL_URL_NO_ENCODE; | |
} | |
if ((dwFlagsInternal & ATL_URL_CANONICALIZE) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0) | |
{ | |
//canonicalize the '\' to '/' | |
if (ch == '\\' && (dwColonPos || (dwFlagsInternal & ATL_URL_COMBINE)) && bRet) | |
{ | |
//if the scheme is not file or it is file and the '\' is in "file:\\" | |
//NOTE: This is to match the way InternetCanonicalizeUrl handles this case | |
if (!bSchemeFile || (dwLen < 7)) | |
{ | |
ch = '/'; | |
} | |
} | |
else if (ch == '.' && dwLen > 0 && (dwFlagsInternal & ATL_URL_NO_META)==0) | |
{ | |
//if we are escaping meta sequences, attempt to do so | |
if (AtlEscapeUrlMetaHelper(&szStringOut, szStringOut-1, dwLen, (char**)(&szStringIn), &dwLen, dwFlagsInternal, dwColonPos)) | |
continue; | |
} | |
} | |
//if we are encoding and it is an unsafe character | |
if (AtlIsUnsafeUrlChar(ch) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0) | |
{ | |
//if we are only encoding spaces, and ch is not a space or | |
//if we are not encoding meta sequences and it is a dot or | |
//if we not encoding percents and it is a percent | |
if (((dwFlagsInternal & ATL_URL_ENCODE_SPACES_ONLY) && ch != ' ') || | |
((dwFlagsInternal & ATL_URL_NO_META) && ch == '.') || | |
(((dwFlagsInternal & ATL_URL_ENCODE_PERCENT) == 0) && ch == '%')) | |
{ | |
//just output it without encoding | |
if (bRet) | |
*szStringOut++ = ch; | |
} | |
else | |
{ | |
//if there is not enough space for the escape sequence | |
if (dwLen >= (dwMaxLength-3)) | |
{ | |
bRet = FALSE; | |
} | |
if (bRet) | |
{ | |
//output the percent, followed by the hex value of the character | |
LPSTR pszTmp = szStringOut; | |
*pszTmp++ = '%'; | |
if ((unsigned char)ch < 16) | |
{ | |
*pszTmp++ = '0'; | |
} | |
Checked::ultoa_s((unsigned char)ch, pszTmp, szStringOutEnd-pszTmp, 16); | |
szStringOut+= sizeof("%FF")-1; | |
} | |
dwLen += sizeof("%FF")-2; | |
} | |
} | |
else //safe character | |
{ | |
if (bRet) | |
*szStringOut++ = ch; | |
} | |
dwLen++; | |
} | |
if (bRet && dwLen < dwMaxLength) | |
*szStringOut = '\0'; | |
if (pdwStrLen) | |
*pdwStrLen = dwLen + 1; | |
if (dwLen+1 > dwMaxLength) | |
bRet = FALSE; | |
return bRet; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Determine if the character is unsafe under the URI RFC document | |
inline BOOL AtlIsUnsafeUrlChar(_In_ char chIn) throw() | |
{ | |
unsigned char ch = (unsigned char)chIn; | |
switch(ch) | |
{ | |
case ';': case '\\': case '?': case '@': case '&': | |
case '=': case '+': case '$': case ',': case ' ': | |
case '<': case '>': case '#': case '%': case '\"': | |
case '{': case '}': case '|': | |
case '^': case '[': case ']': case '`': | |
return TRUE; | |
default: | |
{ | |
if (ch < 32 || ch > 126) | |
return TRUE; | |
return FALSE; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment