Last active
December 17, 2015 01:49
-
-
Save nileshgr/5530780 to your computer and use it in GitHub Desktop.
Import Twitter Archive into Tweet Nest.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BACKUP YOUR EXISTING DB IF ANY!!! | |
Download your Twitter Archive, extract -- get a fresh one. | |
Upload data/js/tweets to your server. | |
Login to server and execute the following commands: | |
mv tweets tweets_src | |
mkdir tweets | |
cd tweets | |
for f in ../tweets_src/*; do tail -n +2 $f > $(basename $f); done | |
Copy this script into . (tweets/ where all modified .js files exist). | |
Edit the script to set TWEETNEST ROOT in line 4 | |
Truncate existing tables: login to mysql and run: | |
truncate table tweets; truncate table tweetwords; truncate table words; | |
Run the script (inside tweets directory): | |
php loadtweetarchive.php (all js files must be present in current directory) | |
------ SCRIPT ------ | |
<?php | |
error_reporting(E_ALL ^ E_NOTICE); ini_set("display_errors", true); // For easy debugging, this is not a production page | |
@set_time_limit(0); | |
set_include_path(get_include_path() . ":<PATH TO TWEETNEST ROOT>/inc"); | |
include 'preheader.php'; | |
include 'config.php'; | |
define('DTP', $config['db']['table_prefix']); | |
$db = new DB('mysql', $config['db']); | |
$search = new TweetNestSearch; | |
$twitterApi = new TwitterApi; | |
foreach(glob("*.js") as $filename) { | |
print "Processing $filename\n"; | |
$tweets = json_decode(file_get_contents($filename), true); | |
if(count($tweets) > 0){ | |
// Ascending sort, oldest first | |
$tweets = array_reverse($tweets); | |
echo "All tweets collected. Reconnecting to DB...\n"; | |
$db->reconnect(); // Sometimes, DB connection times out during tweet loading. This is our counter-action | |
echo "Inserting into DB...\n"; | |
$error = false; | |
foreach($tweets as $tweet){ | |
$tweet['time'] = strtotime($tweet['created_at']); | |
$tweet['tweetid'] = $tweet['id_str']; | |
$tweet['userid'] = $tweet['user']['id_str']; | |
$q = $db->query($twitterApi->insertQuery($tweet)); | |
if(!$q){ | |
die("DATABASE ERROR: " . $db->error()); | |
} | |
$text = $tweet['text']; | |
$te = $tweet['extra']; | |
if(is_string($te)){ $te = @unserialize($tweet['extra']); } | |
if(is_array($te)){ | |
// Because retweets might get cut off otherwise | |
$text = (array_key_exists("rt", $te) && !empty($te['rt']) && !empty($te['rt']['screenname']) && !empty($te['rt']['text'])) | |
? "RT @" . $te['rt']['screenname'] . ": " . $te['rt']['text'] | |
: $tweet['text']; | |
} | |
$search->index($db->insertID(), $text); | |
} | |
echo !$error ? "Done!\n" : ""; | |
} else { | |
echo "Nothing to insert.\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment