Last active
June 17, 2017 13:25
-
-
Save ateneva/f97eb062f6b82ce29a8ceec1261a08f6 to your computer and use it in GitHub Desktop.
How to analyse the punctuation and length of a string with SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select | |
title as Title, | |
--count the number of words in the title | |
length(title) - length(replace(title, ' ', '')) + 1 as TitleWordLength, | |
--analyse the punctuation of a title | |
case | |
when left (title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'starts with a number' | |
when instr(title, '-') !=0 then 'contains a dash' | |
when instr(title, ':') !=0 then 'contains a colon' | |
when instr(title, ',') !=0 then 'contains a comma' | |
when instr(title, '"') !=0 then 'contains quoatation marks' | |
when right(title,1) = '!' then 'ends with an exclamation mark' | |
when right(title,1) = '?' then 'ends with a question mark' | |
when right(title, 1) = '.' then 'ends with a dot' | |
when right(title,1) not in ('?', '!', '.') then 'ends with no punctuation' | |
when right(title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'ends with a number' | |
end as TitlePunctuation, | |
--find the number of characters and classify according to the following criteria | |
length(title) as TitleCharLength, | |
case | |
when length(title) < 40 then '0-40 characters' | |
when length(title) >= 40 and length(title) < 70 then '40-70 charcaters' | |
when length(title) >= 70 then 'more than 70 characters' | |
end as TitleCharLengthClass, | |
---find the number of Upper case characters in the string | |
regexp_count(title, '[A-Z]') as NumberOfUpperCaseCharactersinTitle, | |
from | |
tbl_titles |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment