Amir-Sak/sql

## sql
-- SQL Project - Data Cleaning

-- Step 1: Selecting all data from the 'layoffs' table
SELECT *
FROM layoffs;

-- Remove Duplicates, Standardize Data, Handle Null Values or Blanks, and Remove any Unnecessary Columns

-- Creating a backup table for precautionary purposes
CREATE TABLE layoffs_test_backup LIKE layoffs;

-- Copying data from the 'layoffs' table to the backup table
INSERT INTO layoffs_test_backup
SELECT *
FROM layoffs;

-- -------------------------------------------------------------------
-- Task 1: Removing Duplicates

-- Checking for duplicate rows in the 'layoffs' table
WITH duplicate_cte AS (
    SELECT *,
           ROW_NUMBER() OVER (
               PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions
               ORDER BY company -- Ordering can be adjusted as needed
           ) AS row_num
    FROM world_layoffs.layoffs_test
)

-- Selecting duplicate rows
SELECT *
FROM duplicate_cte
WHERE row_num > 1;

-- Example: Checking duplicates for a specific company (e.g., Casper)
SELECT *
FROM world_layoffs.layoffs_test
WHERE company = 'Casper';

-- Deleting duplicate rows from the 'layoffs' table
SET SQL_SAFE_UPDATES = 0; -- Enabling deletion without WHERE clause (Use with caution)
DELETE FROM world_layoffs.layoffs_test
WHERE (company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) IN (
    SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions
    FROM duplicate_cte
    WHERE row_num > 1
);

-- Resetting SQL_SAFE_UPDATES to its default value
SET SQL_SAFE_UPDATES = 1;

-- Verifying that duplicates are removed
SELECT *
FROM world_layoffs.layoffs_test
WHERE row_num > 1;

-- Creating a new table 'layoffs_test2' for further analysis
CREATE TABLE `layoffs_test2` (
  `company` TEXT,
  `location` TEXT,
  `industry` TEXT,
  `total_laid_off` BIGINT DEFAULT NULL,
  `percentage_laid_off` TEXT,
  `date` TEXT,
  `stage` TEXT,
  `country` TEXT,
  `funds_raised_millions` INT DEFAULT NULL,
   `row_num` INT
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

-- Populating 'layoffs_test2' table with cleaned data
INSERT INTO world_layoffs.layoffs_test2
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) AS row_num
FROM world_layoffs.layoffs_test;

-- Viewing cleaned data in 'layoffs_test2' table
SELECT *
FROM world_layoffs.layoffs_test2
WHERE row_num > 1;

-- Deleting unnecessary duplicate rows from 'layoffs_test2' table
SET SQL_SAFE_UPDATES = 0; -- Enabling deletion without WHERE clause (Use with caution)
DELETE FROM world_layoffs.layoffs_test2
WHERE row_num > 1;
SET SQL_SAFE_UPDATES = 1; -- Resetting SQL_SAFE_UPDATES to its default value

-- Verify that duplicates are removed
SELECT *
FROM world_layoffs.layoffs
WHERE row_num > 1;
	-- SQL Project - Data Cleaning

	-- Step 1: Selecting all data from the 'layoffs' table
	SELECT *
	FROM layoffs;

	-- Remove Duplicates, Standardize Data, Handle Null Values or Blanks, and Remove any Unnecessary Columns

	-- Creating a backup table for precautionary purposes
	CREATE TABLE layoffs_test_backup LIKE layoffs;

	-- Copying data from the 'layoffs' table to the backup table
	INSERT INTO layoffs_test_backup
	SELECT *
	FROM layoffs;

	-- -------------------------------------------------------------------
	-- Task 1: Removing Duplicates

	-- Checking for duplicate rows in the 'layoffs' table
	WITH duplicate_cte AS (
	SELECT *,
	ROW_NUMBER() OVER (
	PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions
	ORDER BY company -- Ordering can be adjusted as needed
	) AS row_num
	FROM world_layoffs.layoffs_test
	)

	-- Selecting duplicate rows
	SELECT *
	FROM duplicate_cte
	WHERE row_num > 1;

	-- Example: Checking duplicates for a specific company (e.g., Casper)
	SELECT *
	FROM world_layoffs.layoffs_test
	WHERE company = 'Casper';

	-- Deleting duplicate rows from the 'layoffs' table
	SET SQL_SAFE_UPDATES = 0; -- Enabling deletion without WHERE clause (Use with caution)
	DELETE FROM world_layoffs.layoffs_test
	WHERE (company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) IN (
	SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions
	FROM duplicate_cte
	WHERE row_num > 1
	);

	-- Resetting SQL_SAFE_UPDATES to its default value
	SET SQL_SAFE_UPDATES = 1;

	-- Verifying that duplicates are removed
	SELECT *
	FROM world_layoffs.layoffs_test
	WHERE row_num > 1;

	-- Creating a new table 'layoffs_test2' for further analysis
	CREATE TABLE `layoffs_test2` (
	`company` TEXT,
	`location` TEXT,
	`industry` TEXT,
	`total_laid_off` BIGINT DEFAULT NULL,
	`percentage_laid_off` TEXT,
	`date` TEXT,
	`stage` TEXT,
	`country` TEXT,
	`funds_raised_millions` INT DEFAULT NULL,
	`row_num` INT
	) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

	-- Populating 'layoffs_test2' table with cleaned data
	INSERT INTO world_layoffs.layoffs_test2
	SELECT *,
	ROW_NUMBER() OVER(
	PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) AS row_num
	FROM world_layoffs.layoffs_test;

	-- Viewing cleaned data in 'layoffs_test2' table
	SELECT *
	FROM world_layoffs.layoffs_test2
	WHERE row_num > 1;

	-- Deleting unnecessary duplicate rows from 'layoffs_test2' table
	SET SQL_SAFE_UPDATES = 0; -- Enabling deletion without WHERE clause (Use with caution)
	DELETE FROM world_layoffs.layoffs_test2
	WHERE row_num > 1;
	SET SQL_SAFE_UPDATES = 1; -- Resetting SQL_SAFE_UPDATES to its default value

	-- Verify that duplicates are removed
	SELECT *
	FROM world_layoffs.layoffs
	WHERE row_num > 1;