Skip to content

Instantly share code, notes, and snippets.

Waldecir Faria wfaria

Block or report user

Report or block wfaria

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View final_election_poll_check.py
get_margin_of_error_interval(sample_size = 10000, number_of_std_dvt = 2)
View margin_of_error_comp.py
x = []
y = []
y1 = []
y2 = []
fig, axes = plt.subplots(1, 2, figsize = (10, 6))
axes = axes.ravel()
for power_of_ten in range(1, 8):
x.append(power_of_ten)
y.append(get_margin_of_error_interval(10 ** power_of_ten, 2))
@wfaria
wfaria / get_margin_of_error_interval.py
Created Mar 10, 2019
Election Poll - margin of error method
View get_margin_of_error_interval.py
def get_margin_of_error_interval(sample_size, number_of_std_dvt):
day_sample = get_n_samples_from_distribution(
samples_number = 1,
sample_size = sample_size)
sample_mean = np.mean(day_sample)
sample_std_dvt = np.std(day_sample)
estimated_std_dvt = sample_std_dvt / math.sqrt(sample_size)
margin_of_error = number_of_std_dvt * estimated_std_dvt
return { "error": margin_of_error, "mean": sample_mean }
@wfaria
wfaria / election_poll_sample_histogram2.py
Created Mar 9, 2019
Election Poll Simulation - Another Histogram with fixed sample number
View election_poll_sample_histogram2.py
fig, axes = plt.subplots(2,3, figsize = (12, 8))
fig.subplots_adjust(hspace=0.4, wspace=0.3)
axes = axes.ravel()
sample_size = [10, 100, 10000, 1000000, 10000000, 100000000000000]
for i in range(len(sample_size)):
sample_means = get_n_sample_means_from_distribution(
samples_number = 100,
sample_size = sample_size[i])
axes[i].hist(sample_means, bins=30)
@wfaria
wfaria / sample_histogram.py
Created Mar 9, 2019
Election Poll Simulation - Sample Histogram
View sample_histogram.py
fig, axes = plt.subplots(2,3, figsize = (12, 8))
fig.subplots_adjust(hspace=0.4, wspace=0.3)
axes = axes.ravel()
days = [5, 50, 500, 1000, 10000, 100000]
for i in range(len(days)):
sample_means = get_n_sample_means_from_distribution(
samples_number = days[i],
sample_size = 100)
axes[i].hist(sample_means, bins=30)
@wfaria
wfaria / get_n_samples_from_distribution.py
Last active Mar 10, 2019
Election Poll Simulation - Getting multiple samples
View get_n_samples_from_distribution.py
def get_n_samples_from_distribution(samples_number, sample_size):
"""
Get multiple samples from our target 'unknown distribution'.
"""
people_per_day = [sample_size] * samples_number
week_samples = list(map(get_sample_from_distribution, people_per_day))
return week_samples
def get_n_sample_means_from_distribution(samples_number, sample_size):
samples = get_n_samples_from_distribution(
@wfaria
wfaria / get_sample_from_distribution.py
Created Mar 9, 2019
Election Poll Simulation - Getting a Sample of size 10
View get_sample_from_distribution.py
def get_sample_from_distribution(n):
"""
Creates an array of size n.
Each value will be 1 if some person would vote on Alice and
0 if he would vote on Bob.
"""
sample_opinions = []
for i in range(n):
sample_opinions.append(get_opinion_from_random_person())
@wfaria
wfaria / get_opinion_from_random_person.py
Last active Mar 10, 2019
Election Poll Simulation - Asking opinion from a single person.
View get_opinion_from_random_person.py
import numpy as np
import matplotlib.pyplot as plt
import math
def get_opinion_from_random_person():
# Usually this probabiliy is not known. Setting it here for simulation purposes.
probability_of_alice_win = 0.53
return np.random.binomial(
n = 1,
p = probability_of_alice_win)
@wfaria
wfaria / HeartbeatStreaming.py
Created Aug 23, 2018
Spark Streaming with Kafka example for a Medium article using Python
View HeartbeatStreaming.py
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
def main():
# Task configuration.
topic = "heartbeat"
brokerAddresses = "localhost:9092"
batchTime = 20
@wfaria
wfaria / PySparkTest.py
Created Aug 23, 2018
PySpark test code
View PySparkTest.py
from pyspark import SparkContext
dataFile = "./sbin/start-master.sh"
sc = SparkContext("spark://ip-XXX-XX-X-XX.sa-east-1.compute.internal:7077", "Simple App")
textRdd = sc.textFile(dataFile)
print "Number of lines: ", textRdd.count()
print "Number of lines with 8080: ", textRdd.filter(lambda x : '8080' in x).count()
sc.stop()
You can’t perform that action at this time.