Bjørn Hansen bh1995

## gist:c9742e7dd58c8ffe5d0316f67462bdeb
// All valid credit card numbers
const valid1 = [4, 5, 3, 9, 6, 7, 7, 9, 0, 8, 0, 1, 6, 8, 0, 8];
const valid2 = [5, 5, 3, 5, 7, 6, 6, 7, 6, 8, 7, 5, 1, 4, 3, 9];
const valid3 = [3, 7, 1, 6, 1, 2, 0, 1, 9, 9, 8, 5, 2, 3, 6];
const valid4 = [6, 0, 1, 1, 1, 4, 4, 3, 4, 0, 6, 8, 2, 9, 0, 5];
const valid5 = [4, 5, 3, 9, 4, 0, 4, 9, 6, 7, 8, 6, 9, 6, 6, 6];

// All invalid credit card numbers
const invalid1 = [4, 5, 3, 2, 7, 7, 8, 7, 7, 1, 0, 9, 1, 7, 9, 5];
const invalid2 = [5, 7, 9, 5, 5, 9, 3, 3, 9, 2, 1, 3, 4, 6, 4, 3];

## index.html
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>CSS Cheat Sheet</title>
    <link rel="stylesheet" href="styles.css" />
  </head>
  <body>
    <header>

## Loading ECG data from .h5 format
import h5py
# load af data
path = 'af_save_path' # for ex. '/content/drive/MyDrive/ecg_data_full/af_full.h5'
h5f = h5py.File(path,'r')
af_array = h5f['af_tot'][:]
h5f.close()
# load normal data
path = 'normal_save_path' # for ex. '/content/drive/MyDrive/ecg_data_full/normal_full.h5'
h5f = h5py.File(path,'r')
normal_array = h5f['normal_tot'][:]

## word_score
import re
# Number of reviews to use
N_total = 32261216
# Set word occurence lower threshold (must occur in 0.5 percent of the number of reviews)
N_limit = 0.005 * N_total
score_threshold = 90

comments_cleaned = reviews_with_score_df\
    .select(['comments', 'mean_score'])\
    .limit(N_total)\

## filtering words
# Filter all reviews
listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()
listings_score_only_df = listings_score_only_df.withColumn('id',
                                                           listings_score_only_df['id'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull())
listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating',
                                                           listings_score_only_df['review_scores_rating'].cast(IntegerType()))
listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull())

listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()

## spark_session
# create the Spark session
conf = SparkConf().set("spark.ui.port", "4050")
sc = pyspark.SparkContext(conf=conf)
spark = SparkSession.builder.getOrCreate()

## seasonal_trend
beijing_popularity_month = beijing_popularity.groupBy(f.month(beijing_popularity["date"]).alias("month")).count().sort("month", ascending=True)
beijing_popularity_month_pd = beijing_popularity_month.toPandas()
num = len(beijing_popularity_month_pd)*4
c = [i for i in range(0,num,4)]
plt.figure(figsize=(10,4))
plt.bar(c, beijing_popularity_month_pd['count'],width=3, align='center', alpha=0.5)
plt.xticks(c,beijing_popularity_month_pd["month"])
plt.ylabel('Popularity')
plt.xlabel('Month')
plt.title('The Popularity of Beijing by month')

## beijing_popularity
beijing_popularity_sql = """SELECT a.date, b.city,b.neighbourhood_cleansed
FROM reviews_df a
LEFT JOIN listings_df2 b
on a.listing_id = b.id
WHERE b.city='Beijing'
ORDER BY date ASC;
"""
beijing_popularity = spark.sql(beijing_popularity_sql).cache()
beijing_popularity_pd = beijing_popularity.toPandas()
# plot

## beijing_listing_price_filter
lowPrice = 2000
beijing_price_pd = beijing_price.filter(beijing_price["price"]<lowPrice).toPandas()
# plot
plt.figure(figsize = [16,7])# set figuresize
plt.hist(beijing_price_low_pd['price'],bins = 50,alpha = 0.5,color = 'red',edgecolor = 'white', linewidth = 1.2)
plt.xlabel('price',size  =  15)
plt.ylabel('count',size  =  15)
plt.title('Histogram of Bei Jing price distribution of below 2000$ price',size  =  15)
plt.show()

## beijing_listing_price
beijing_price_sql = """SELECT price1 as price
FROM listings_df2
WHERE city = 'Beijing'
"""
beijing_price = spark.sql(beijing_price_sql).cache()
beijing_price_pd = beijing_price.toPandas()
# plot
plt.figure(figsize = [16,7])# set figuresize
plt.hist(beijing_price_pd['price'],bins = 50,alpha = 0.5,color = 'red',edgecolor = 'white', linewidth = 1.2)
plt.xlabel('price',size  =  15)
	// All valid credit card numbers
	const valid1 = [4, 5, 3, 9, 6, 7, 7, 9, 0, 8, 0, 1, 6, 8, 0, 8];
	const valid2 = [5, 5, 3, 5, 7, 6, 6, 7, 6, 8, 7, 5, 1, 4, 3, 9];
	const valid3 = [3, 7, 1, 6, 1, 2, 0, 1, 9, 9, 8, 5, 2, 3, 6];
	const valid4 = [6, 0, 1, 1, 1, 4, 4, 3, 4, 0, 6, 8, 2, 9, 0, 5];
	const valid5 = [4, 5, 3, 9, 4, 0, 4, 9, 6, 7, 8, 6, 9, 6, 6, 6];

	// All invalid credit card numbers
	const invalid1 = [4, 5, 3, 2, 7, 7, 8, 7, 7, 1, 0, 9, 1, 7, 9, 5];
	const invalid2 = [5, 7, 9, 5, 5, 9, 3, 3, 9, 2, 1, 3, 4, 6, 4, 3];
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta name="viewport" content="width=device-width, initial-scale=1.0" />
	<title>CSS Cheat Sheet</title>
	<link rel="stylesheet" href="styles.css" />
	</head>
	<body>
	<header>
	import h5py
	# load af data
	path = 'af_save_path' # for ex. '/content/drive/MyDrive/ecg_data_full/af_full.h5'
	h5f = h5py.File(path,'r')
	af_array = h5f['af_tot'][:]
	h5f.close()
	# load normal data
	path = 'normal_save_path' # for ex. '/content/drive/MyDrive/ecg_data_full/normal_full.h5'
	h5f = h5py.File(path,'r')
	normal_array = h5f['normal_tot'][:]
	import re
	# Number of reviews to use
	N_total = 32261216
	# Set word occurence lower threshold (must occur in 0.5 percent of the number of reviews)
	N_limit = 0.005 * N_total
	score_threshold = 90

	comments_cleaned = reviews_with_score_df\
	.select(['comments', 'mean_score'])\
	.limit(N_total)\
	# Filter all reviews
	listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()
	listings_score_only_df = listings_score_only_df.withColumn('id',
	listings_score_only_df['id'].cast(IntegerType()))
	listings_score_only_df = listings_score_only_df.filter(col('id').isNotNull())
	listings_score_only_df = listings_score_only_df.withColumn('review_scores_rating',
	listings_score_only_df['review_scores_rating'].cast(IntegerType()))
	listings_score_only_df = listings_score_only_df.filter(col('review_scores_rating').isNotNull())

	listings_score_only_df = listings_df.select(['id', 'review_scores_rating']).cache()
	# create the Spark session
	conf = SparkConf().set("spark.ui.port", "4050")
	sc = pyspark.SparkContext(conf=conf)
	spark = SparkSession.builder.getOrCreate()
	beijing_popularity_month = beijing_popularity.groupBy(f.month(beijing_popularity["date"]).alias("month")).count().sort("month", ascending=True)
	beijing_popularity_month_pd = beijing_popularity_month.toPandas()
	num = len(beijing_popularity_month_pd)*4
	c = [i for i in range(0,num,4)]
	plt.figure(figsize=(10,4))
	plt.bar(c, beijing_popularity_month_pd['count'],width=3, align='center', alpha=0.5)
	plt.xticks(c,beijing_popularity_month_pd["month"])
	plt.ylabel('Popularity')
	plt.xlabel('Month')
	plt.title('The Popularity of Beijing by month')
	beijing_popularity_sql = """SELECT a.date, b.city,b.neighbourhood_cleansed
	FROM reviews_df a
	LEFT JOIN listings_df2 b
	on a.listing_id = b.id
	WHERE b.city='Beijing'
	ORDER BY date ASC;
	"""
	beijing_popularity = spark.sql(beijing_popularity_sql).cache()
	beijing_popularity_pd = beijing_popularity.toPandas()
	# plot
	lowPrice = 2000
	beijing_price_pd = beijing_price.filter(beijing_price["price"]<lowPrice).toPandas()
	# plot
	plt.figure(figsize = [16,7])# set figuresize
	plt.hist(beijing_price_low_pd['price'],bins = 50,alpha = 0.5,color = 'red',edgecolor = 'white', linewidth = 1.2)
	plt.xlabel('price',size = 15)
	plt.ylabel('count',size = 15)
	plt.title('Histogram of Bei Jing price distribution of below 2000$ price',size = 15)
	plt.show()
	beijing_price_sql = """SELECT price1 as price
	FROM listings_df2
	WHERE city = 'Beijing'
	"""
	beijing_price = spark.sql(beijing_price_sql).cache()
	beijing_price_pd = beijing_price.toPandas()
	# plot
	plt.figure(figsize = [16,7])# set figuresize
	plt.hist(beijing_price_pd['price'],bins = 50,alpha = 0.5,color = 'red',edgecolor = 'white', linewidth = 1.2)
	plt.xlabel('price',size = 15)