Created
August 11, 2022 09:55
-
-
Save natyrix/07914dfaae2cb67d13f814fa5659d5c0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import pandas as pd | |
import sys | |
import os | |
sys.path.append(os.path.abspath(os.path.join("../Twitter-Data-Analysis/"))) | |
from extract_dataframe import read_json | |
from extract_dataframe import TweetDfExtractor | |
# For unit testing the data reading and processing codes, | |
# we will need about 5 tweet samples. | |
# Create a sample not more than 10 tweets and place it in a json file. | |
# Provide the path to the samples tweets file you created below | |
# put here the path to where you placed the file e.g. ./sampletweets.json. | |
sampletweetsjsonfile = "./tests/sample_global.json" | |
_, tweet_list = read_json(sampletweetsjsonfile) | |
columns = [ | |
"created_at", | |
"source", | |
"original_text", | |
"clean_text", | |
"sentiment", | |
"polarity", | |
"subjectivity", | |
"lang", | |
"favorite_count", | |
"retweet_count", | |
"original_author", | |
"screen_count", | |
"followers_count", | |
"friends_count", | |
"possibly_sensitive", | |
"hashtags", | |
"user_mentions", | |
"place", | |
"place_coord_boundaries", | |
] | |
class TestTweetDfExtractor(unittest.TestCase): | |
""" | |
A class for unit-testing function in the fix_clean_tweets_dataframe.py file | |
Args: | |
----- | |
unittest.TestCase this allows the new class to inherit | |
from the unittest module | |
""" | |
def setUp(self) -> pd.DataFrame: | |
self.df = TweetDfExtractor(tweet_list[:5]) | |
# tweet_df = self.df.get_tweet_df() | |
def test_find_statuses_count(self): | |
self.assertEqual( | |
self.df.find_statuses_count(), [8097, 5831, 1627, 1627, 18958] | |
) | |
def test_find_full_text(self): | |
text = [ | |
"RT @i_ameztoy Extra random image I Lets focus in one very specific zone of the western coast gt Longjing District Taichung #City #Ta", | |
"RT @IndoPac_Info #Chinas media explains the military reasons for each area of the drills in the #Taiwan Strait Read the labels in the pi", | |
"China even cut off communication they dont anwer phonecalls from the US But here clown @ZelenskyyUa enters the stage to ask #XiJinping to change Putins mind", | |
"Putin to #XiJinping I told you my friend Taiwan will be a vassal state including nukes much like the Ukrainian model I warned you But it took Pelosi to open Chinas eyes", | |
"RT @ChinaUncensored I’m sorry I thought Taiwan was an independent country because it had its own government currency military travel d" | |
] | |
self.assertEqual(self.df.find_clean_text(), text) | |
def test_find_sentiments(self): | |
self.assertEqual( | |
self.df.find_sentiments(self.df.find_full_text()), | |
( | |
[-0.125, -0.1, 0.0, 0.1, -6.938893903907228e-18], | |
[0.190625, 0.1, 0.0, 0.35, 0.55625], | |
['negative', 'negative', 'neutral', 'positive', 'negative'] | |
), | |
) | |
def test_find_screen_name(self): | |
name = ['i_ameztoy', 'ZIisq', 'Fin21Free', | |
'Fin21Free', 'VizziniDolores'] | |
self.assertEqual(self.df.find_screen_name(), name) | |
def test_find_followers_count(self): | |
f_count = [20497, 65, 85, 85, 910] | |
self.assertEqual(self.df.find_followers_count(), f_count) | |
def test_find_friends_count(self): | |
friends_count = [2621, 272, 392, 392, 2608] | |
self.assertEqual(self.df.find_friends_count(), friends_count) | |
def test_find_is_sensitive(self): | |
self.assertEqual(self.df.is_sensitive(), [ | |
None, None, None, None, None]) | |
def test_find_hashtags(self): | |
self.assertEqual(self.df.find_hashtags(), ['City++++', 'China++++Taiwan++++', 'XiJinping++++', 'XiJinping++++', '']) | |
def test_find_mentions(self): | |
self.assertEqual(self.df.find_mentions(), ['i_ameztoy++++', 'IndoPac_Info++++', 'ZelenskyyUa++++', '', 'ChinaUncensored++++']) | |
if __name__ == "__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment