Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save GiscardBiamby/267881 to your computer and use it in GitHub Desktop.
Save GiscardBiamby/267881 to your computer and use it in GitHub Desktop.
use tprschools
go
set transaction isolation level read uncommitted
go
declare @ascii_chars varchar(150)
declare @extended_ascii varchar(500)
declare @ascii_code int
set @ascii_code = 0
set @ascii_chars = ''
set @extended_ascii = ''
-- // Construct list of ascii chars:
while (@ascii_code <= 255)
begin
if (@ascii_code <= 127)
begin
set @ascii_chars = @ascii_chars + char(@ascii_code)
end else begin
set @extended_ascii = @extended_ascii + char(@ascii_code)
end
set @ascii_code = @ascii_code + 1
end
print 'ascii chars (len='+cast(len(@ascii_chars) as varchar)+'): ''' + @ascii_chars + ''''
-- // Escape values that are reserved chars in SQL like patterns:
set @ascii_chars = replace(@ascii_chars,'\','\\')
set @ascii_chars = replace(@ascii_chars,'^','\^')
set @ascii_chars = replace(@ascii_chars,']','\]')
set @ascii_chars = replace(@ascii_chars,'-','\-')
set @ascii_chars = replace(@ascii_chars,'[','\[')
set @ascii_chars = replace(@ascii_chars,'_','\_')
set @ascii_chars = replace(@ascii_chars,'%','\%')
set @ascii_chars = replace(@ascii_chars,'''','''''')
print 'ascii chars (len='+cast(len(@ascii_chars) as varchar)+'): ''' + @ascii_chars + ''''
print '@extended ascii chars (len='+cast(len(@extended_ascii) as varchar)+'): ''' + @extended_ascii + ''''
-- //
-- // First prove @ascii_char is properly escaped:
select distinct
pattern, [desc]
, case when f.pattern like '%[^'+@ascii_chars+']%' escape '\' then 'NOT 7-bit ASCII' end '(excl. match)'
, case when f.pattern like '%['+@extended_ascii+']%' escape '\' then 'NOT 7-bit ASCII' end '(incl. match)'
, case when patindex('%['+@extended_ascii+']%', f.pattern) > 0 then 'NOT 7-bit ASCII' end '(patindex match)'
, patindex('%['+@extended_ascii+']%', f.pattern) as pat_index
from
(
select 'normal text' pattern, 'normal, ascii chars' 'desc'
union select '‚', 'a NON-ASCII CHAR'
union select '[', 'left sq. bracket'
union select ']', 'right sq. bracket'
union select '_', 'underscore'
union select '-', 'minus sign is a special char when using like ''%[]%'''
union select '%', 'percent'
union select '^', 'caret'
union select '''', 'single quote'
union select '\', 'what we are using as the escape char'
) f
-- //
-- // find values w/ non-7-bit-ascii, also include sneak-peek of text surrounding first occurance of the value:
select distinct
pattern, [desc]
, case when f.pattern like '%[^'+@ascii_chars+']%' escape '\' then 'NOT 7-bit ASCII' end '(excl. match)'
, case when f.pattern like '%['+@extended_ascii+']%' escape '\' then 'NOT 7-bit ASCII' end '(incl. match)'
, case when patindex('%['+@extended_ascii+']%', f.pattern) > 0 then 'NOT 7-bit ASCII' end '(patindex match)'
, patindex('%['+@extended_ascii+']%', f.pattern) as first_index_of_non_ascii
, substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern),1) first_non_ascii_char
, ascii(substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern),1)) matching_char_ascii_code
, patindex('%['+@extended_ascii+']%', f.pattern) index_of_first_matching_char
, substring(f.pattern,patindex('%['+@extended_ascii+']%', f.pattern)-5,10) surrounding_text
from
(
select 'normal text' pattern, 'normal, ascii chars' 'desc'
union select '[', 'left sq. bracket'
union select ']', 'right sq. bracket'
union select '_', 'underscore'
union select '-', 'minus sign is a special char when using like ''%[]%'''
union select '%', 'percent'
union select '^', 'caret'
union select '''', 'single quote'
union select 'some normal text, basic ascii charset', 'you shouldn''t see this in the query results'
-- // none of above rows should be returned, but the remaining 5 should be in the result set:
union select ' consist of a ½ hour long ses', 'random text sample'
union select 'ublic service ¡V whether that', 'random text sample'
union select 'uality Matters© ad well as by', 'random text sample'
union select 'l store front ¬electronic sub', 'random text sample'
union select 'This one has two non-ascii chars. The query will only show the first one. l store front ¬electronic sub. It has major issues though with 1080i HDTV. Interlaced material is not properly deinterlaced and this is only under VMR9. l store front ¬electronic sub.', 'random text sample'
) f
where
f.pattern like '%['+@extended_ascii+']%'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment