wqweto/AtlEscapeUrl

## AtlEscapeUrl
//Convert all unsafe characters in szStringIn to escape sequences
//lpszStringIn and lpszStringOut should be different strings
inline _Success_(return != FALSE) BOOL AtlEscapeUrl(
  _In_z_ LPCSTR szStringIn,
	_Out_writes_to_(dwMaxLength, *pdwStrLen) LPSTR szStringOut,
	_Out_opt_ DWORD* pdwStrLen,
	_In_ DWORD dwMaxLength,
	_In_ DWORD dwFlags = 0)
{
	ATLENSURE( szStringIn != NULL );
	ATLENSURE( szStringOut != NULL );
	ATLENSURE( szStringIn != szStringOut );

	char ch;
	DWORD dwLen = 0;
	BOOL bRet = TRUE;
	BOOL bSchemeFile = FALSE;
	DWORD dwColonPos = 0;
	DWORD dwFlagsInternal = dwFlags;
	//The next 2 are for buffer security checks
	LPSTR szOrigStringOut=szStringOut;
	LPSTR szStringOutEnd = (szStringOut + dwMaxLength);

	while((ch = *szStringIn++) != '\0')
	{
		//if we are at the maximum length, set bRet to FALSE
		//this ensures no more data is written to szStringOut, but
		//the length of the string is still updated, so the user
		//knows how much space to allocate
		if (dwLen == dwMaxLength)
		{
			bRet = FALSE;
		}

		//Keep track of the first ':' position to match the weird way
		//InternetCanonicalizeUrl handles it
		if (ch == ':' && (dwFlagsInternal & ATL_URL_CANONICALIZE) && !dwColonPos)
		{
			if (bRet)
			{
				*szStringOut = '\0';
				LPSTR pszStrToLower=szStringOut-dwLen;
				ATLENSURE(pszStrToLower >= szOrigStringOut &&  pszStrToLower <= szStringOutEnd);
				Checked::strlwr_s(pszStrToLower,szStringOutEnd-pszStrToLower+1);

				if (dwLen == 4 && !strncmp("file", (szStringOut-4), 4))
				{
					bSchemeFile = TRUE;
				}
			}

			dwColonPos = dwLen+1;
		}
		else if (ch == '%' && (dwFlagsInternal & ATL_URL_DECODE))
		{
			//decode the escaped sequence
			if (*szStringIn != '\0')
			{
				short nFirstDigit = AtlHexValue(*szStringIn++);

				if( nFirstDigit < 0 )
				{
					bRet = FALSE;
					break;
				}
				ch = static_cast<char>(16*nFirstDigit);
				if (*szStringIn != '\0')
				{
					short nSecondDigit = AtlHexValue(*szStringIn++);

					if( nSecondDigit < 0 )
					{
						bRet = FALSE;
						break;
					}
					ch = static_cast<char>(ch+nSecondDigit);
				}
				else
				{
					break;
				}
			}
			else
			{
				break;
			}
		}
		else if ((ch == '?' || ch == '#') && (dwFlagsInternal & ATL_URL_BROWSER_MODE))
		{
			//ATL_URL_BROWSER mode does not encode after a '?' or a '#'
			dwFlagsInternal |= ATL_URL_NO_ENCODE;
		}

		if ((dwFlagsInternal & ATL_URL_CANONICALIZE) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
		{
			//canonicalize the '\' to '/'
			if (ch == '\\' && (dwColonPos || (dwFlagsInternal & ATL_URL_COMBINE)) && bRet)
			{
				//if the scheme is not file or it is file and the '\' is in "file:\\"
				//NOTE: This is to match the way InternetCanonicalizeUrl handles this case
				if (!bSchemeFile || (dwLen < 7))
				{
					ch = '/';
				}
			}
			else if (ch == '.' && dwLen > 0 && (dwFlagsInternal & ATL_URL_NO_META)==0)
			{
				//if we are escaping meta sequences, attempt to do so
				if (AtlEscapeUrlMetaHelper(&szStringOut, szStringOut-1, dwLen, (char**)(&szStringIn), &dwLen, dwFlagsInternal, dwColonPos))
					continue;
			}
		}

		//if we are encoding and it is an unsafe character
		if (AtlIsUnsafeUrlChar(ch) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
		{
			//if we are only encoding spaces, and ch is not a space or
			//if we are not encoding meta sequences and it is a dot or
			//if we not encoding percents and it is a percent
			if (((dwFlagsInternal & ATL_URL_ENCODE_SPACES_ONLY) && ch != ' ') ||
				((dwFlagsInternal & ATL_URL_NO_META) && ch == '.') ||
				(((dwFlagsInternal & ATL_URL_ENCODE_PERCENT) == 0) && ch == '%'))
			{
				//just output it without encoding
				if (bRet)
					*szStringOut++ = ch;
			}
			else
			{
				//if there is not enough space for the escape sequence
				if (dwLen >= (dwMaxLength-3))
				{
					bRet = FALSE;
				}
				if (bRet)
				{
					//output the percent, followed by the hex value of the character
					LPSTR pszTmp = szStringOut;
					*pszTmp++ = '%';
					if ((unsigned char)ch < 16)
					{
						*pszTmp++ = '0';
					}
					Checked::ultoa_s((unsigned char)ch, pszTmp, szStringOutEnd-pszTmp, 16);
					szStringOut+= sizeof("%FF")-1;
				}
				dwLen += sizeof("%FF")-2;
			}
		}
		else //safe character
		{
			if (bRet)
				*szStringOut++ = ch;
		}
		dwLen++;
	}

	if (bRet && dwLen < dwMaxLength)
		*szStringOut = '\0';

	if (pdwStrLen)
		*pdwStrLen = dwLen + 1;

	if (dwLen+1 > dwMaxLength)
		bRet = FALSE;

	return bRet;
}

## AtlIsUnsafeUrlChar
//Determine if the character is unsafe under the URI RFC document
inline BOOL AtlIsUnsafeUrlChar(_In_ char chIn) throw()
{
  unsigned char ch = (unsigned char)chIn;
	switch(ch)
	{
		case ';': case '\\': case '?': case '@': case '&':
		case '=': case '+': case '$': case ',': case ' ':
		case '<': case '>': case '#': case '%': case '\"':
		case '{': case '}': case '|':
		case '^': case '[': case ']': case '`':
			return TRUE;
		default:
		{
			if (ch < 32 || ch > 126)
				return TRUE;
			return FALSE;
		}
	}
}
	//Convert all unsafe characters in szStringIn to escape sequences
	//lpszStringIn and lpszStringOut should be different strings
	inline _Success_(return != FALSE) BOOL AtlEscapeUrl(
	_In_z_ LPCSTR szStringIn,
	_Out_writes_to_(dwMaxLength, *pdwStrLen) LPSTR szStringOut,
	_Out_opt_ DWORD* pdwStrLen,
	_In_ DWORD dwMaxLength,
	_In_ DWORD dwFlags = 0)
	{
	ATLENSURE( szStringIn != NULL );
	ATLENSURE( szStringOut != NULL );
	ATLENSURE( szStringIn != szStringOut );

	char ch;
	DWORD dwLen = 0;
	BOOL bRet = TRUE;
	BOOL bSchemeFile = FALSE;
	DWORD dwColonPos = 0;
	DWORD dwFlagsInternal = dwFlags;
	//The next 2 are for buffer security checks
	LPSTR szOrigStringOut=szStringOut;
	LPSTR szStringOutEnd = (szStringOut + dwMaxLength);

	while((ch = *szStringIn++) != '\0')
	{
	//if we are at the maximum length, set bRet to FALSE
	//this ensures no more data is written to szStringOut, but
	//the length of the string is still updated, so the user
	//knows how much space to allocate
	if (dwLen == dwMaxLength)
	{
	bRet = FALSE;
	}

	//Keep track of the first ':' position to match the weird way
	//InternetCanonicalizeUrl handles it
	if (ch == ':' && (dwFlagsInternal & ATL_URL_CANONICALIZE) && !dwColonPos)
	{
	if (bRet)
	{
	*szStringOut = '\0';
	LPSTR pszStrToLower=szStringOut-dwLen;
	ATLENSURE(pszStrToLower >= szOrigStringOut && pszStrToLower <= szStringOutEnd);
	Checked::strlwr_s(pszStrToLower,szStringOutEnd-pszStrToLower+1);

	if (dwLen == 4 && !strncmp("file", (szStringOut-4), 4))
	{
	bSchemeFile = TRUE;
	}
	}

	dwColonPos = dwLen+1;
	}
	else if (ch == '%' && (dwFlagsInternal & ATL_URL_DECODE))
	{
	//decode the escaped sequence
	if (*szStringIn != '\0')
	{
	short nFirstDigit = AtlHexValue(*szStringIn++);

	if( nFirstDigit < 0 )
	{
	bRet = FALSE;
	break;
	}
	ch = static_cast<char>(16*nFirstDigit);
	if (*szStringIn != '\0')
	{
	short nSecondDigit = AtlHexValue(*szStringIn++);

	if( nSecondDigit < 0 )
	{
	bRet = FALSE;
	break;
	}
	ch = static_cast<char>(ch+nSecondDigit);
	}
	else
	{
	break;
	}
	}
	else
	{
	break;
	}
	}
	else if ((ch == '?' \|\| ch == '#') && (dwFlagsInternal & ATL_URL_BROWSER_MODE))
	{
	//ATL_URL_BROWSER mode does not encode after a '?' or a '#'
	dwFlagsInternal \|= ATL_URL_NO_ENCODE;
	}

	if ((dwFlagsInternal & ATL_URL_CANONICALIZE) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
	{
	//canonicalize the '\' to '/'
	if (ch == '\\' && (dwColonPos \|\| (dwFlagsInternal & ATL_URL_COMBINE)) && bRet)
	{
	//if the scheme is not file or it is file and the '\' is in "file:\\"
	//NOTE: This is to match the way InternetCanonicalizeUrl handles this case
	if (!bSchemeFile \|\| (dwLen < 7))
	{
	ch = '/';
	}
	}
	else if (ch == '.' && dwLen > 0 && (dwFlagsInternal & ATL_URL_NO_META)==0)
	{
	//if we are escaping meta sequences, attempt to do so
	if (AtlEscapeUrlMetaHelper(&szStringOut, szStringOut-1, dwLen, (char**)(&szStringIn), &dwLen, dwFlagsInternal, dwColonPos))
	continue;
	}
	}

	//if we are encoding and it is an unsafe character
	if (AtlIsUnsafeUrlChar(ch) && (dwFlagsInternal & ATL_URL_NO_ENCODE)==0)
	{
	//if we are only encoding spaces, and ch is not a space or
	//if we are not encoding meta sequences and it is a dot or
	//if we not encoding percents and it is a percent
	if (((dwFlagsInternal & ATL_URL_ENCODE_SPACES_ONLY) && ch != ' ') \|\|
	((dwFlagsInternal & ATL_URL_NO_META) && ch == '.') \|\|
	(((dwFlagsInternal & ATL_URL_ENCODE_PERCENT) == 0) && ch == '%'))
	{
	//just output it without encoding
	if (bRet)
	*szStringOut++ = ch;
	}
	else
	{
	//if there is not enough space for the escape sequence
	if (dwLen >= (dwMaxLength-3))
	{
	bRet = FALSE;
	}
	if (bRet)
	{
	//output the percent, followed by the hex value of the character
	LPSTR pszTmp = szStringOut;
	*pszTmp++ = '%';
	if ((unsigned char)ch < 16)
	{
	*pszTmp++ = '0';
	}
	Checked::ultoa_s((unsigned char)ch, pszTmp, szStringOutEnd-pszTmp, 16);
	szStringOut+= sizeof("%FF")-1;
	}
	dwLen += sizeof("%FF")-2;
	}
	}
	else //safe character
	{
	if (bRet)
	*szStringOut++ = ch;
	}
	dwLen++;
	}

	if (bRet && dwLen < dwMaxLength)
	*szStringOut = '\0';

	if (pdwStrLen)
	*pdwStrLen = dwLen + 1;

	if (dwLen+1 > dwMaxLength)
	bRet = FALSE;

	return bRet;
	}
	//Determine if the character is unsafe under the URI RFC document
	inline BOOL AtlIsUnsafeUrlChar(_In_ char chIn) throw()
	{
	unsigned char ch = (unsigned char)chIn;
	switch(ch)
	{
	case ';': case '\\': case '?': case '@': case '&':
	case '=': case '+': case '$': case ',': case ' ':
	case '<': case '>': case '#': case '%': case '\"':
	case '{': case '}': case '\|':
	case '^': case '[': case ']': case '`':
	return TRUE;
	default:
	{
	if (ch < 32 \|\| ch > 126)
	return TRUE;
	return FALSE;
	}
	}
	}