Skip to content

Instantly share code, notes, and snippets.

@gingerwizard
Created February 21, 2024 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gingerwizard/d3b32ed801973498e87145ed0c6e4bdb to your computer and use it in GitHub Desktop.
Save gingerwizard/d3b32ed801973498e87145ed0c6e4bdb to your computer and use it in GitHub Desktop.
CREATE TABLE surveys
(
    `response_id` Int64,
    `development_activity` Enum8('I am a developer by profession' = 1, 'I am a student who is learning to code' = 2, 'I am not primarily a developer, but I write code sometimes as part of my work' = 3, 'I code primarily as a hobby' = 4, 'I used to be a developer by profession, but no longer am' = 5, 'None of these' = 6, 'NA' = 7),
    `employment` Enum8('Independent contractor, freelancer, or self-employed' = 1, 'Student, full-time' = 2, 'Employed full-time' = 3, 'Student, part-time' = 4, 'I prefer not to say' = 5, 'Employed part-time' = 6, 'Not employed, but looking for work' = 7, 'Retired' = 8, 'Not employed, and not looking for work' = 9, 'NA' = 10),
    `country` LowCardinality(String),
    `us_state` LowCardinality(String),
    `uk_county` LowCardinality(String),
    `education_level` Enum8('Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)' = 1, 'Bachelor’s degree (B.A., B.S., B.Eng., etc.)' = 2, 'Master’s degree (M.A., M.S., M.Eng., MBA, etc.)' = 3, 'Other doctoral degree (Ph.D., Ed.D., etc.)' = 4, 'Some college/university study without earning a degree' = 5, 'Something else' = 6, 'Professional degree (JD, MD, etc.)' = 7, 'Primary/elementary school' = 8, 'Associate degree (A.A., A.S., etc.)' = 9, 'NA' = 10),
    `age_started_to_code` Enum8('Younger than 5 years' = 1, '5 - 10 years' = 2, '11 - 17 years' = 3, '18 - 24 years' = 4, '25 - 34 years' = 5, '35 - 44 years' = 6, '45 - 54 years' = 7, '55 - 64 years' = 8, 'Older than 64 years' = 9, 'NA' = 10),
    `how_learned_to_code` Enum8('Coding Bootcamp' = 1, 'Other online resources (ex: videos, blogs, etc)' = 2, 'School' = 3, 'Online Forum' = 4, 'Friend or family member' = 5, 'Online Courses or Certification' = 6, 'Other (please specify):' = 7, 'Colleague' = 8, 'Books / Physical media' = 9, 'NA' = 10),
    `years_coding` Nullable(UInt8),
    `years_as_a_professional_developer` Nullable(UInt8),
    `developer_type` String,
    `organization_size` Enum8('Just me - I am a freelancer, sole proprietor, etc.' = 1, '2 to 9 employees' = 2, '10 to 19 employees' = 3, '20 to 99 employees' = 4, '100 to 499 employees' = 5, '500 to 999 employees' = 6, '1,000 to 4,999 employees' = 7, '5,000 to 9,999 employees' = 8, '10,000 or more employees' = 9, 'I don’t know' = 10, 'NA' = 11),
    `compensation_total` Nullable(UInt64),
    `compensation_frequency` Enum8('Weekly' = 1, 'Monthly' = 2, 'Yearly' = 3, 'NA' = 4),
    `language_have_worked_with` String,
    `language_want_to_work_with` String,
    `database_have_worked_with` String,
    `database_want_to_work_with` String,
    `platform_have_worked_with` String,
    `platform_want_to_work_with` String,
    `web_framework_have_worked_with` String,
    `web_framework_want_to_work` String,
    `other_tech_have_worked_with` String,
    `other_tech_want_to_work` String,
    `infrastructure_tools_have_worked_with` String,
    `infrastructure_tools_want_to_work_with` String,
    `developer_tools_have_worked_with` String,
    `developer_tools_want_to_work_with` String,
    `operating_system` Enum8('MacOS' = 1, 'Windows' = 2, 'Linux-based' = 3, 'BSD' = 4, 'Other (please specify):' = 5, 'Windows Subsystem for Linux (WSL)' = 6, 'NA' = 7),
    `frequency_visit_stackoverflow` Enum8('Multiple times per day' = 1, 'Daily or almost daily' = 2, 'A few times per week' = 3, 'A few times per month or weekly' = 4, 'Less than once per month or monthly' = 5, 'NA' = 6),
    `has_stackoverflow_account` Enum8('Yes' = 1, 'No' = 2, 'Not sure/can\'t remember' = 3, 'NA' = 4),
    `frequency_use_in_stackoverflow` Enum8('Multiple times per day' = 1, 'Daily or almost daily' = 2, 'A few times per week' = 3, 'A few times per month or weekly' = 4, 'Less than once per month or monthly' = 5, 'I have never participated in Q&A on Stack Overflow' = 6, 'NA' = 7),
    `consider_self_active_community_member` Enum8('Yes, definitely' = 1, 'Neutral' = 2, 'Yes, somewhat' = 3, 'No, not at all' = 4, 'No, not really' = 5, 'NA' = 6, 'Not sure' = 7),
    `member_other_communities` Enum8('Yes' = 1, 'No' = 2, 'NA' = 4),
    `age` Enum8('Under 18 years old' = 1, '18-24 years old' = 2, '25-34 years old' = 3, '35-44 years old' = 4, '45-54 years old' = 5, '55-64 years old' = 6, '65 years or older' = 7, 'NA' = 8, 'Prefer not to say' = 9),
    `annual_salary` Nullable(UInt64)) ENGINE = MergeTree ORDER BY tuple()
    
    
INSERT INTO surveys SELECT
    ResponseId as response_id,
    MainBranch as development_activity,
    Employment as employment,
    Country as country,
    US_State as us_state,
    UK_Country as uk_country,
    EdLevel as education_level,
    Age1stCode as age_started_to_code,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LearnCode))) as how_learned_to_code,
    multiIf(YearsCode = 'Less than 1 year', 0, YearsCode = 'More than 50 years', 51, toUInt8OrNull(YearsCode)) as years_coding,
    multiIf(YearsCodePro = 'Less than 1 year', 0, YearsCodePro = 'More than 50 years', 51, toUInt8OrNull(YearsCodePro)) as years_as_a_professional_developer,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DevType))) AS developer_type,
    OrgSize as organization_size,
    CompTotal as compensation_total,
    CompFreq as compensation_frequency,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LanguageHaveWorkedWith))) AS language_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(LanguageWantToWorkWith))) AS language_want_to_work_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DatabaseHaveWorkedWith))) AS database_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(DatabaseWantToWorkWith))) AS database_want_to_work_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(PlatformHaveWorkedWith))) AS platform_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(PlatformWantToWorkWith))) AS platform_want_to_work_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(WebframeHaveWorkedWith))) AS web_framework_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(WebframeWantToWorkWith))) AS web_framework_want_to_work,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(MiscTechHaveWorkedWith))) AS other_tech_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(MiscTechWantToWorkWith))) AS other_tech_want_to_work,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(ToolsTechHaveWorkedWith))) AS infrastructure_tools_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(ToolsTechWantToWorkWith))) AS infrastructure_tools_want_to_work_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(NEWCollabToolsHaveWorkedWith))) AS developer_tools_have_worked_with,
    arrayFilter(x -> x != 'NA', splitByChar(';', assumeNotNull(NEWCollabToolsWantToWorkWith))) AS developer_tools_want_to_work_with,
    OpSys as operating_system,
    SOVisitFreq as frequency_visit_stackoverflow,
    SOAccount as has_stackoverflow_account,
    SOPartFreq as frequency_use_in_stackoverflow,
    SOComm as consider_self_active_community_member,
    NEWOtherComms as member_other_communities,
    Age as age,
    ConvertedCompYearly as annual_salary
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/surveys/2021/survey_results_public.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment