Skip to content

Instantly share code, notes, and snippets.

Waldecir Faria wfaria

Block or report user

Report or block wfaria

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@wfaria
wfaria / HeartbeatProducer.py
Last active Aug 23, 2018
Kafka Producer example for Medium article
View HeartbeatProducer.py
import time
from time import gmtime, strftime
from kafka import KafkaProducer
def publish_message(producer_instance, topic_name, key, value):
try:
key_bytes = bytes(key, encoding='utf-8')
value_bytes = bytes(value, encoding='utf-8')
producer_instance.send(topic_name, key=key_bytes, value=value_bytes)
producer_instance.flush()
@wfaria
wfaria / PySparkTest.py
Created Aug 23, 2018
PySpark test code
View PySparkTest.py
from pyspark import SparkContext
dataFile = "./sbin/start-master.sh"
sc = SparkContext("spark://ip-XXX-XX-X-XX.sa-east-1.compute.internal:7077", "Simple App")
textRdd = sc.textFile(dataFile)
print "Number of lines: ", textRdd.count()
print "Number of lines with 8080: ", textRdd.filter(lambda x : '8080' in x).count()
sc.stop()
@wfaria
wfaria / HeartbeatStreaming.py
Created Aug 23, 2018
Spark Streaming with Kafka example for a Medium article using Python
View HeartbeatStreaming.py
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
def main():
# Task configuration.
topic = "heartbeat"
brokerAddresses = "localhost:9092"
batchTime = 20
@wfaria
wfaria / get_sample_from_distribution.py
Created Mar 9, 2019
Election Poll Simulation - Getting a Sample of size 10
View get_sample_from_distribution.py
def get_sample_from_distribution(n):
"""
Creates an array of size n.
Each value will be 1 if some person would vote on Alice and
0 if he would vote on Bob.
"""
sample_opinions = []
for i in range(n):
sample_opinions.append(get_opinion_from_random_person())
@wfaria
wfaria / sample_histogram.py
Created Mar 9, 2019
Election Poll Simulation - Sample Histogram
View sample_histogram.py
fig, axes = plt.subplots(2,3, figsize = (12, 8))
fig.subplots_adjust(hspace=0.4, wspace=0.3)
axes = axes.ravel()
days = [5, 50, 500, 1000, 10000, 100000]
for i in range(len(days)):
sample_means = get_n_sample_means_from_distribution(
samples_number = days[i],
sample_size = 100)
axes[i].hist(sample_means, bins=30)
@wfaria
wfaria / election_poll_sample_histogram2.py
Created Mar 9, 2019
Election Poll Simulation - Another Histogram with fixed sample number
View election_poll_sample_histogram2.py
fig, axes = plt.subplots(2,3, figsize = (12, 8))
fig.subplots_adjust(hspace=0.4, wspace=0.3)
axes = axes.ravel()
sample_size = [10, 100, 10000, 1000000, 10000000, 100000000000000]
for i in range(len(sample_size)):
sample_means = get_n_sample_means_from_distribution(
samples_number = 100,
sample_size = sample_size[i])
axes[i].hist(sample_means, bins=30)
@wfaria
wfaria / get_margin_of_error_interval.py
Created Mar 10, 2019
Election Poll - margin of error method
View get_margin_of_error_interval.py
def get_margin_of_error_interval(sample_size, number_of_std_dvt):
day_sample = get_n_samples_from_distribution(
samples_number = 1,
sample_size = sample_size)
sample_mean = np.mean(day_sample)
sample_std_dvt = np.std(day_sample)
estimated_std_dvt = sample_std_dvt / math.sqrt(sample_size)
margin_of_error = number_of_std_dvt * estimated_std_dvt
return { "error": margin_of_error, "mean": sample_mean }
@wfaria
wfaria / get_n_samples_from_distribution.py
Last active Mar 10, 2019
Election Poll Simulation - Getting multiple samples
View get_n_samples_from_distribution.py
def get_n_samples_from_distribution(samples_number, sample_size):
"""
Get multiple samples from our target 'unknown distribution'.
"""
people_per_day = [sample_size] * samples_number
week_samples = list(map(get_sample_from_distribution, people_per_day))
return week_samples
def get_n_sample_means_from_distribution(samples_number, sample_size):
samples = get_n_samples_from_distribution(
View margin_of_error_comp.py
x = []
y = []
y1 = []
y2 = []
fig, axes = plt.subplots(1, 2, figsize = (10, 6))
axes = axes.ravel()
for power_of_ten in range(1, 8):
x.append(power_of_ten)
y.append(get_margin_of_error_interval(10 ** power_of_ten, 2))
View final_election_poll_check.py
get_margin_of_error_interval(sample_size = 10000, number_of_std_dvt = 2)
You can’t perform that action at this time.