Peeush Agarwal peeush-the-developer

## std_dev.py
# Data
a = [12, 54, 32, 100, 20]

# Method 1
## Calculate sample mean
mean = sum(a) / len(a)
## Calculate distance from sample mean and then square it
distance_from_mean_squared = [(i-mean)**2 for i in a]
## Calculate the sample variance
var = sum(distance_from_mean_squared) / (len(a)-1)

## variance.py
# Data
a = [12, 54, 32, 100, 20]

# Method 1
## Calculate sample mean
mean = sum(a) / len(a)
## Calculate distance from sample mean and then square it
distance_from_mean_squared = [(i-mean)**2 for i in a]
## Calculate the sample variance
var = sum(distance_from_mean_squared) / (len(a)-1)

## iqr.py
# Data
a = [12, 54, 32, 100, 20]

# Import numpy library for percentile calculation
from numpy import percentile

# Calculate percentiles for 75th(Q3) and 25th(Q1)
q3, q1 = percentile(a, [75, 25])

# Calculate IQR = Q3 - Q1

## range.py
# Data
a = [12, 54, 32, 100, 20]

# Calculate range
range = max(a) - min(a)

# Display output
print(range)

# Output: 88

## mode.py
data = [5, 5.5, 5.5, 5.2, 5.6]

# We have a direct formula in statistics library to calculate the mode
from statistics import mode

# Calculate the mode value
mode_ = mode(data)

# Display the mode value
print(mode_)

## median_even_counts.py
data = [5, 6, 3, 8, 4, 7]

# Step 1: Sort the values
data_sorted = sorted(data) # [3, 4, 5, 6, 7, 8]

# Step 2: Find the central values in the data
central_values = data_sorted[2:4] # we need 2nd and 3rd indices values to calculate the median
median_ = sum(central_values)/2

# Display median

## median_odd_counts.py
data = [5, 6, 3, 4, 7]

# Step 1: Sort the values
data_sorted = sorted(data) # [3, 4, 5, 6, 7]

# Step 2: Find the central value in the data
median_ = data_sorted[2] # 2 gives us 3rd item which is central value

# Display median
print(median_)

## mean_outliers.py
salaries_in_K = [1, 10, 1000]

# mean = (sum of values)/(total number of values)
mean = sum(salaries_in_K)/len(salaries_in_K)

# Display calculated mean
print(mean)

# Output
# 337.0

## mean.py
data = [3, 4, 5, 6, 7]

# mean = (sum of values)/(total number of values)
mean = sum(data)/len(data)

# Display calculated mean
print(mean)

# Output
# 5.0

## create_dataframe.py
# Create dataframe with the items in lies list
df = pd.DataFrame(items, columns=['Date', 'Lie', 'Truth', 'Truth_Link'])

# Display top 5 rows from the dataframe
df.head()
	# Data
	a = [12, 54, 32, 100, 20]

	# Method 1
	## Calculate sample mean
	mean = sum(a) / len(a)
	## Calculate distance from sample mean and then square it
	distance_from_mean_squared = [(i-mean)**2 for i in a]
	## Calculate the sample variance
	var = sum(distance_from_mean_squared) / (len(a)-1)
	# Data
	a = [12, 54, 32, 100, 20]

	# Import numpy library for percentile calculation
	from numpy import percentile

	# Calculate percentiles for 75th(Q3) and 25th(Q1)
	q3, q1 = percentile(a, [75, 25])

	# Calculate IQR = Q3 - Q1
	# Data
	a = [12, 54, 32, 100, 20]

	# Calculate range
	range = max(a) - min(a)

	# Display output
	print(range)

	# Output: 88
	data = [5, 5.5, 5.5, 5.2, 5.6]

	# We have a direct formula in statistics library to calculate the mode
	from statistics import mode

	# Calculate the mode value
	mode_ = mode(data)

	# Display the mode value
	print(mode_)
	data = [5, 6, 3, 8, 4, 7]

	# Step 1: Sort the values
	data_sorted = sorted(data) # [3, 4, 5, 6, 7, 8]

	# Step 2: Find the central values in the data
	central_values = data_sorted[2:4] # we need 2nd and 3rd indices values to calculate the median
	median_ = sum(central_values)/2

	# Display median
	data = [5, 6, 3, 4, 7]

	# Step 1: Sort the values
	data_sorted = sorted(data) # [3, 4, 5, 6, 7]

	# Step 2: Find the central value in the data
	median_ = data_sorted[2] # 2 gives us 3rd item which is central value

	# Display median
	print(median_)
	salaries_in_K = [1, 10, 1000]

	# mean = (sum of values)/(total number of values)
	mean = sum(salaries_in_K)/len(salaries_in_K)

	# Display calculated mean
	print(mean)

	# Output
	# 337.0
	data = [3, 4, 5, 6, 7]

	# mean = (sum of values)/(total number of values)
	mean = sum(data)/len(data)

	# Display calculated mean
	print(mean)

	# Output
	# 5.0
	# Create dataframe with the items in lies list
	df = pd.DataFrame(items, columns=['Date', 'Lie', 'Truth', 'Truth_Link'])

	# Display top 5 rows from the dataframe
	df.head()