Masoud Saedi masoud-saedi

## globox_abtest_data_analysis_queries.sql
-- * Data Extraction and Analysis:

-- Q: What are the start and end dates of the experiment?
-- A: 2023-01-25 to 2023-02-06
SELECT MIN(join_dt), MAX(join_dt)
FROM groups;

-- Q: How many total users were in the experiment?
-- A: 48,943
SELECT COUNT(uid)

## portugal_crime_data_cleaning.py
import pandas as pd

# Load the Raw Data:
df = pd.read_csv('crimesportugal.csv', delimiter=';')

# Define Column Names:
column_names = {
    'total': 'Total Crime',
    'vdom': 'Domestic Violence',
    'fur_veiculo': 'Vehicles Stolen',

## tavel_tide_cohort_sql_queries.sql
-- Cohort definition:
WITH cohort_users AS (
  SELECT user_id
  FROM sessions
  WHERE session_start > '2023-01-04'
  GROUP BY user_id
  HAVING COUNT(session_id) > 7
),

-- Using for calculating the distance between two airports in the final query:
	-- * Data Extraction and Analysis:

	-- Q: What are the start and end dates of the experiment?
	-- A: 2023-01-25 to 2023-02-06
	SELECT MIN(join_dt), MAX(join_dt)
	FROM groups;

	-- Q: How many total users were in the experiment?
	-- A: 48,943
	SELECT COUNT(uid)
	import pandas as pd

	# Load the Raw Data:
	df = pd.read_csv('crimesportugal.csv', delimiter=';')

	# Define Column Names:
	column_names = {
	'total': 'Total Crime',
	'vdom': 'Domestic Violence',
	'fur_veiculo': 'Vehicles Stolen',
	-- Cohort definition:
	WITH cohort_users AS (
	SELECT user_id
	FROM sessions
	WHERE session_start > '2023-01-04'
	GROUP BY user_id
	HAVING COUNT(session_id) > 7
	),

	-- Using for calculating the distance between two airports in the final query: