Serena McDonnell serenamm

## twitter_insights.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                serenamm
                / twitter_insights.md
            
            
              Last active
              February 19, 2020 02:43
            
              
                Usefulness of Twitter data in making investment decisions
              
          
    Usefulness of Twitter data in making investment decisions

We had no part in generating the following charts. We are looking for your opinion on whether they can be useful in gathering information for trading purposes.
Part 1: Questions about Figures

Hashtag Frequency

Hashtags are used widely on Twitter. Here we look at the most popular hashtags from a subset of Twitter users, from yesterday.

  
## latex
$x=n^2$

## final_test.py
from mock import mock

create_table_query = '''
SELECT
    item_id_1,
    item_id_2
FROM (
    SELECT
        item_id_1,
        item_id_2,

## test_create_new_table.py
def test_create_new_table(mocker):

    # Mock all our variables
    mock_spark = mock.Mock()
    mock_category_q = mock.Mock()
    mock_created_table = mock.Mock()
    mock_created_table_coalesced = mock.Mock()
    # Calling spark.sql with create_table_query returns created_table - we need to mock it
    mock_spark.sql.side_effect = [mock_created_table]
    # Mock the output of calling .coalesce on created_table

## test_make_query.py
def test_make_query_true(mocker):

    # Create some fake table paths
    test_paths = {
        "product_table": {
            "table": "products",
        },
        "similarity_table": {
            "table": "product_similarity"
        }

## everything_together.py
import pyspark
from pyspark.sql import SparkSession

create_table_query = '''
SELECT
    item_id_1,
    item_id_2
FROM (
    SELECT
        item_id_1,

## create_table.py
def create_new_table(spark, table_paths, params, same_category_q):

    similarity_table = table_paths["product_similarity"]["table"]

    created_table = spark.sql(create_table_query.format(similarity_table=similarity_table,
                                                        same_category_q=same_category_q,
                                                        num_items=params["num_items"]))

    # Write table to some path
    created_table.coalesce(1).write.save(table_paths["created_table"]["path"],

## using_boolean.py
'''
SELECT
  s.item_id_1,
  s.item_id_2,
  s.similarity_score
FROM product_similarity s
{same_category_q}
'''.format(same_category_q='') # Depends on value of same_category boolean

## slightly_more_complicated.py
SELECT
    s.item_id_1,
    s.item_id_2,
    s.similarity_score
FROM (
    SELECT
        s.item_id_1,
        s.item_id_2,
        s.similarity_score,
        ROW_NUMBER() OVER(PARTITION BY item_id_1 ORDER BY similarity_score DESC) as row_num

## inner_joins.py
SELECT
  s.item_id_1,
  s.item_id_2,
  s.similarity_score
FROM product_similarity s
INNER JOIN products p
ON s.item_id_1 = p.item_id
INNER JOIN products q
ON s.item_id_2 = q.item_id
WHERE s.item_id_1 != s.item_id_2
	from mock import mock

	create_table_query = '''
	SELECT
	item_id_1,
	item_id_2
	FROM (
	SELECT
	item_id_1,
	item_id_2,
	def test_create_new_table(mocker):

	# Mock all our variables
	mock_spark = mock.Mock()
	mock_category_q = mock.Mock()
	mock_created_table = mock.Mock()
	mock_created_table_coalesced = mock.Mock()
	# Calling spark.sql with create_table_query returns created_table - we need to mock it
	mock_spark.sql.side_effect = [mock_created_table]
	# Mock the output of calling .coalesce on created_table
	def test_make_query_true(mocker):

	# Create some fake table paths
	test_paths = {
	"product_table": {
	"table": "products",
	},
	"similarity_table": {
	"table": "product_similarity"
	}
	import pyspark
	from pyspark.sql import SparkSession

	create_table_query = '''
	SELECT
	item_id_1,
	item_id_2
	FROM (
	SELECT
	item_id_1,
	def create_new_table(spark, table_paths, params, same_category_q):

	similarity_table = table_paths["product_similarity"]["table"]

	created_table = spark.sql(create_table_query.format(similarity_table=similarity_table,
	same_category_q=same_category_q,
	num_items=params["num_items"]))

	# Write table to some path
	created_table.coalesce(1).write.save(table_paths["created_table"]["path"],
	'''
	SELECT
	s.item_id_1,
	s.item_id_2,
	s.similarity_score
	FROM product_similarity s
	{same_category_q}
	'''.format(same_category_q='') # Depends on value of same_category boolean