Created
September 27, 2016 21:16
-
-
Save ramnov/2ae11fb3dda0e2ab1e19d736fccd4f26 to your computer and use it in GitHub Desktop.
Clean the Loan Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--DROP unnecessary column desc | |
ALTER TABLE [dbo].[LoanStats] DROP COLUMN [desc] | |
--Remove % from int_rate and convert its type to float | |
UPDATE [dbo].[LoanStats] SET [int_rate] = REPLACE([int_rate], '%', '') | |
ALTER TABLE [dbo].[LoanStats] ALTER COLUMN [int_rate] float | |
--Remove % from revol_util and convert its type to float | |
UPDATE [dbo].[LoanStats] SET [revol_util] = REPLACE([revol_util], '%', '') | |
ALTER TABLE [dbo].[LoanStats] ALTER COLUMN [revol_util] float | |
--Remove rows where loan_status is empty | |
DELETE FROM [dbo].[LoanStats] where [loan_status] IS NULL | |
--Classify all loans as good/bad based on its status and store it in a column named “is_bad” | |
ALTER TABLE [dbo].[LoanStats] ADD [is_bad] int | |
UPDATE [dbo].[LoanStats] | |
SET [is_bad] = (CASE WHEN loan_status IN ('Late (16-30 days)', 'Late (31-120 days)', 'Default', 'Charged Off') THEN 1 ELSE 0 END) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment