Skip to content

Instantly share code, notes, and snippets.

@nelero
Forked from alper/import-tweets.txt
Last active December 16, 2015 01:49
Show Gist options
  • Save nelero/5357713 to your computer and use it in GitHub Desktop.
Save nelero/5357713 to your computer and use it in GitHub Desktop.

Architecture

Archive from tweeter contains a folder named "data/js/tweets". In this folder, tweets are combined by year and month in js file (eg. 2010-01.js for tweets published in january 2010).

Usage

Put all this files in a tweets folder in the root of thinkup. Put importoldtwitter.php file in the root of thinkup. Run the script importoldtwitter.php in command line.

The script will loop on each file in tweets folder, read and import it, then remove it.

Precision

To run this script in a browser, comment lines 11 to 13 included.
<?php
//chdir(dirname(__FILE__) . '/../');
error_reporting(E_ALL);
require_once 'init.php';
require_once 'config.inc.php';
// Avoid "Error: DateTime::__construct(): It is not safe to rely on the system's timezone settings" error
date_default_timezone_set($THINKUP_CFG['timezone']);
// don't run via the web...
if (isset($_SERVER['SERVER_NAME'])) {
die("This script should only be run via the command line.");
}
try {
// do we need a migration?
$db_version = UpgradeDatabaseController::getCurrentDBVersion($cached = false);
$config = Config::getInstance();
$thinkup_db_version = $config->getValue('THINKUP_VERSION');
// Hacking in here my import code
// Loader::addPath('/home/alper/thinkuptest.aardverschuiving.com/plugins/twitter/model/');
//
// $instance_dao = DAOFactory::getDAO('TwitterInstanceDAO');
// $plugin_option_dao = DAOFactory::GetDAO('PluginOptionDAO');
// $options = $plugin_option_dao->getOptionsHash('twitter', true);
// $instances = $instance_dao->getAllActiveInstancesStalestFirstByNetwork('twitter');
// $instance = $instances[0];
//
// $crawler = new TwitterCrawler($instance, NULL);
//
// var_dump($crawler);
$post_dao = DAOFactory::getDAO('PostDAO');
$directory = "./tweets/";
$files = glob($directory . "*.js");
foreach($files as $file)
{
$str_data = file_get_contents($file);
$str_data = substr($str_data, 32);
$data = json_decode($str_data);
foreach ($data as $tweet) {
$parsed_tweet = array(
'post_id' => $tweet->id,
'author_username' => $tweet->user->screen_name,
'author_fullname' => $tweet->user->name,
'author_avatar' => $tweet->user->profile_image_url_https,
'is_protected' => $tweet->user->protected,
'author_user_id' => (string)$tweet->user->id,
'user_id' => (string)$tweet->user->id,
'post_text' => (string)$tweet->text,
'pub_date' => gmdate("Y-m-d H:i:s", strToTime($tweet->created_at)),
'in_reply_to_post_id' => (string)$tweet->in_reply_to_status_id,
'in_reply_to_user_id' => (string)$tweet->in_reply_to_user_id,
'source' => (string)$tweet->source,
'favorited' => (string)$tweet->favorited,
'place' => (string)$tweet->place->full_name,
'network' => 'twitter'
);
$inserted_post_key = $post_dao->addPost($parsed_tweet);
echo 'key ' . $inserted_post_key . ' ';
}
}
exit;
} catch(Exception $e) {
error_log(" Error: " . $e->getMessage() . "\n");
}
Grailbird.data.tweets_2010_01 =
[ {
"source" : "web",
"entities" : {
"user_mentions" : [ {
"name" : "Elannu Azcrea",
"screen_name" : "elannu",
"indices" : [ 3, 10 ],
"id_str" : "20392478",
"id" : 20392478
} ],
"media" : [ ],
"hashtags" : [ {
"text" : "concoursrt",
"indices" : [ 23, 34 ]
} ],
"urls" : [ ]
},
"geo" : {
},
"id_str" : "7625102514",
"text" : "RT @elannu: Attention: #concoursrt ! A gagner des emplacements pub sur http://www.el-annuaire.com en RT ce message http://bit.ly/8aZV6",
"retweeted_status" : {
"source" : "web",
"entities" : {
"user_mentions" : [ ],
"media" : [ ],
"hashtags" : [ {
"text" : "concoursrt",
"indices" : [ 11, 22 ]
} ],
"urls" : [ ]
},
"geo" : {
},
"id_str" : "7625089486",
"text" : "Attention: #concoursrt ! A gagner des emplacements pub sur http://www.el-annuaire.com en RT ce message http://bit.ly/8aZV6",
"id" : 7625089486,
"created_at" : "Mon Jan 11 10:06:02 +0000 2010",
"user" : {
"name" : "Elannu Azcrea",
"screen_name" : "elannu",
"protected" : false,
"id_str" : "20392478",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/3170939118/9b5d219d3ef6081378cee17b9a5ecfd5_normal.jpeg",
"id" : 20392478,
"verified" : false
}
},
"id" : 7625102514,
"created_at" : "Mon Jan 11 10:06:51 +0000 2010",
"user" : {
"name" : "Aur\u00E9lien",
"screen_name" : "aurel_p",
"protected" : false,
"id_str" : "14321671",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/2524147488/l4btm1pg9q9ev0tfmuxp_normal.jpeg",
"id" : 14321671,
"verified" : false
}
} ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment