Skip to content

Instantly share code, notes, and snippets.

@baditaflorin
Created December 23, 2016 22:26
Show Gist options
  • Save baditaflorin/b3d272c4bda99110b784ba2ae7bc55bb to your computer and use it in GitHub Desktop.
Save baditaflorin/b3d272c4bda99110b784ba2ae7bc55bb to your computer and use it in GitHub Desktop.
select * from
(select
--Extract all info from post_stats
ps.*,
-- Calculate the percentage for num_loves,num_wows,num_hahas,num_sads,num_angries
(ps.num_loves*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_loves,
(ps.num_wows*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_wows,
(ps.num_hahas*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_hahas,
(ps.num_sads*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_sads,
(ps.num_angries*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_angries,
-- Calculat the percetange of shares for the posts, from the avg number of shares for that page.
(ps.num_shares*100)/nullif((avg_shares_page),0) perc_shares_page,
-- Post informations from fb_posts f
f.page_name,
f.post_type,
f.post_message,
f.post_link,
-- remove the brakets that are corrupting the data
regexp_replace(p.link, '[{""}]','','g') link,
regexp_replace(p.category, '[{""}]','','g') category,
(CAST(regexp_replace(p.fan_count, '[{}]', '', 'g')AS decimal)) fan_count,
(CAST (g.score AS decimal)) score,
(CAST (g.magnitude AS float8)) magnitude
from fb_posts f
-- Add the data collected from the modules and join them to the original dataset.
-- This allow us to get the Sentiment Analysis data and show the result.
inner join mod_fb_posts_google_sentiment g on g.post_id = f.post_id
-- This allow us to get the FB page informations, fan_count, categories of the page.
inner join mod_fb_to_page_stats p on (CAST (p.page_id AS decimal) = f.page_id)
-- Add add all of the informations from post_stats ( we are filtering using the MAX(data_scraped) in the where clause to get only the latest version for each post.
inner join (select * from post_stats ) ps on ps.post_id = f.post_id
-- Get the average number of shares per each post, compared to the average of the total shares divided by the number of posts.
INNER JOIN (SELECT round(avg(num_shares)) avg_shares_page,page_name fb_page_name from fb_posts group by page_name) avg_f on avg_f.fb_page_name = f.page_name
-- We remove posts that have no text message.
where f.post_name is not null
and
-- each post is recorded every hour, so for each post we have over 100 values, using this next line, we select just the latest posts from post_stats
ps. data_scraped = (SELECT MAX(data_scraped) FROM post_stats max_date WHERE f.post_id = max_date.post_id)
--limit 5
) subquery
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment