Created
December 23, 2016 22:26
-
-
Save baditaflorin/b3d272c4bda99110b784ba2ae7bc55bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select * from | |
(select | |
--Extract all info from post_stats | |
ps.*, | |
-- Calculate the percentage for num_loves,num_wows,num_hahas,num_sads,num_angries | |
(ps.num_loves*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_loves, | |
(ps.num_wows*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_wows, | |
(ps.num_hahas*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_hahas, | |
(ps.num_sads*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_sads, | |
(ps.num_angries*100)/nullif((ps.num_reactions - ps.num_likes),0) perc_angries, | |
-- Calculat the percetange of shares for the posts, from the avg number of shares for that page. | |
(ps.num_shares*100)/nullif((avg_shares_page),0) perc_shares_page, | |
-- Post informations from fb_posts f | |
f.page_name, | |
f.post_type, | |
f.post_message, | |
f.post_link, | |
-- remove the brakets that are corrupting the data | |
regexp_replace(p.link, '[{""}]','','g') link, | |
regexp_replace(p.category, '[{""}]','','g') category, | |
(CAST(regexp_replace(p.fan_count, '[{}]', '', 'g')AS decimal)) fan_count, | |
(CAST (g.score AS decimal)) score, | |
(CAST (g.magnitude AS float8)) magnitude | |
from fb_posts f | |
-- Add the data collected from the modules and join them to the original dataset. | |
-- This allow us to get the Sentiment Analysis data and show the result. | |
inner join mod_fb_posts_google_sentiment g on g.post_id = f.post_id | |
-- This allow us to get the FB page informations, fan_count, categories of the page. | |
inner join mod_fb_to_page_stats p on (CAST (p.page_id AS decimal) = f.page_id) | |
-- Add add all of the informations from post_stats ( we are filtering using the MAX(data_scraped) in the where clause to get only the latest version for each post. | |
inner join (select * from post_stats ) ps on ps.post_id = f.post_id | |
-- Get the average number of shares per each post, compared to the average of the total shares divided by the number of posts. | |
INNER JOIN (SELECT round(avg(num_shares)) avg_shares_page,page_name fb_page_name from fb_posts group by page_name) avg_f on avg_f.fb_page_name = f.page_name | |
-- We remove posts that have no text message. | |
where f.post_name is not null | |
and | |
-- each post is recorded every hour, so for each post we have over 100 values, using this next line, we select just the latest posts from post_stats | |
ps. data_scraped = (SELECT MAX(data_scraped) FROM post_stats max_date WHERE f.post_id = max_date.post_id) | |
--limit 5 | |
) subquery |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment