Skip to content

Instantly share code, notes, and snippets.

@Lucent
Created August 19, 2018 04:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lucent/cbdbaff5e6b8c41c813acff8ea758cf5 to your computer and use it in GitHub Desktop.
Save Lucent/cbdbaff5e6b8c41c813acff8ea758cf5 to your computer and use it in GitHub Desktop.
Search and cleanup LiveJournal entries
<?php
if ($_GET["password"] != "password") { ?>
<p>Private search feature. Intrusion logged.</p>
<?php
return; }
set_time_limit(300);
$journal_dir = "C:\documents and settings\michael dayah\my documents\lj backup";
$username = "lucent";
$entry_array = array ();
$databarx = array ();
$databary = array ();
function GetDirArray($sPath) {
$retVal = array ();
$handle = opendir($sPath);
while ($file = readdir($handle))
if (preg_match("/^\d{4}-\d{2}\.xml$/i", $file)) $retVal[count($retVal)] = $file;
closedir($handle);
sort($retVal);
return $retVal;
}
$journal_file_array = GetDirArray($journal_dir);
function GetChildren($vals, &$i) {
$children = array ();
while (++$i < count($vals)) {
switch ($vals[$i]["type"]) {
case "complete":
if (array_key_exists("value", $vals[$i]))
$children[$vals[$i]["tag"]] = $vals[$i]["value"];
break;
case "open":
array_push($children, GetChildren($vals, $i));
break;
case "close":
return $children;
}
}
}
function GetXMLTree($file) {
$data = implode("", file($file));
$p = xml_parser_create();
xml_parser_set_option($p, XML_OPTION_SKIP_WHITE, 1);
xml_parse_into_struct($p, $data, &$vals, &$index);
xml_parser_free($p);
$i = 0;
return GetChildren($vals, $i);
}
function cmp($a, $b) {
if ($a == $b) return 0;
return (fieldsize($a["EVENT"]) < fieldsize($b["EVENT"])) ? -1 : 1;
}
foreach ($journal_file_array as $month) {
$month_array = GetXMLTree($journal_dir . "\\" . $month);
foreach ($month_array as $entry)
array_push($entry_array, $entry);
}
function journalsize($entry) {
$journal_size = 0;
switch ($_GET["size"]) {
case "subject":
if (array_key_exists("SUBJECT", $entry))
$journal_size += fieldsize($entry["SUBJECT"]);
break;
case "event":
if (array_key_exists("EVENT", $entry))
$journal_size += fieldsize($entry["EVENT"]);
break;
case "currents":
if (array_key_exists("CURRENT_MUSIC", $entry))
$journal_size += fieldsize($entry["CURRENT_MUSIC"]);
if (array_key_exists("CURRENT_MOOD", $entry))
$journal_size += fieldsize($entry["CURRENT_MOOD"]);
break;
}
return $journal_size;
}
function fieldsize($text) {
return count(preg_split('/\s+/', $text));
}
function show_results($entry, $matches) {
global $username;
if (date("His", strtotime($entry["EVENTTIME"])) == 235900) { ?>
<a href="http://www.livejournal.com/users/<?= $username ?>/<?= $entry["ITEMID"] ?>.html"><?= date("j M Y", strtotime($entry["EVENTTIME"])) . paragraph($entry["EVENT"]) ?></a><br> <!-- fieldsize($entry["EVENT"]) -->
<?php } else { ?>
<a href="http://www.livejournal.com/users/<?= $username ?>/<?= $entry["ITEMID"] ?>.html"><?= date("j M Y, g:i&\l\\t;\s\m\a\l\l&\g\\t;A&\l\\t;/\s\m\a\l\l&\g\\t;", strtotime($entry["EVENTTIME"])) . paragraph($entry["EVENT"]) ?></a><br> <!-- fieldsize($entry["EVENT"]) -->
<?php }
$offset = 0;
foreach ($matches as $match) {
$match_location = strpos($entry["EVENT"], $match[0], $offset);
echo "<tt>", htmlspecialchars(substr($entry["EVENT"], abs($match_location - 50), 100)), "</tt><br>\n";
$offset = $match_location + 1;
}
}
function paragraph($event) {
}
//usort ($entry_array, "cmp");
switch ($_GET["subjects"]) {
case "include":
$look_in = "EVENT";
break;
case "exclude":
$look_in = "EVENT";
break;
case "only":
$look_in = "SUBJECT";
break;
}
$journal_size = 0;
$movies = array();
$movielist = array();
foreach ($entry_array as $entry) {
$journal_size += journalsize($entry);
/*
// make sure (LOCATION) end of subject
if (!(preg_match("/.* \([HWMEAD-]\)$/", $entry["SUBJECT"])))
echo nl2br("no location in subject: {$entry["EVENTTIME"]}\n");
// make sure paragraph tags are used properly
if ((preg_match_all("/<p[^>]*>/", $entry["EVENT"], $junk) != preg_match_all("/<\/p>/", $entry["EVENT"], $junk)) || (preg_match_all("/<\/p>/", $entry["EVENT"], $junk) != preg_match_all("/<p[^>]*>(.*?)<\/p>/", $entry["EVENT"], $junk)))
echo nl2br("paragraph tag mismatch: {$entry["EVENTTIME"]}\n");
if (!array_key_exists("CURRENT_MOOD", $entry))
echo nl2br("no mood: {$entry["ITEMID"]}\n");
// make sure there's no double spacing
if (strstr($entry["EVENT"], " "))
echo nl2br("double-spacing detected: {$entry["EVENTTIME"]}\n");
// check for bad chars in the music
if (array_key_exists("CURRENT_MUSIC", $entry))
for ($x = 1; $x < strlen($entry["CURRENT_MUSIC"]); $x++)
if ( (ord($entry["CURRENT_MUSIC"]{$x}) > 126) || (ord($entry["CURRENT_MUSIC"]{$x}) < 32) )
echo nl2br("bad character: {$entry["CURRENT_MUSIC"]{$x}} at " . $x / strlen($entry["CURRENT_MUSIC"]) * 100 . "% of {$entry["EVENTTIME"]}\n");
// check for bad chars in the subject
for ($x = 1; $x < strlen($entry["SUBJECT"]); $x++)
if ( (ord($entry["SUBJECT"]{$x}) > 126) || (ord($entry["SUBJECT"]{$x}) < 32) )
echo nl2br("bad character: {$entry["SUBJECT"]{$x}} at " . $x / strlen($entry["SUBJECT"]) * 100 . "% of {$entry["EVENTTIME"]}\n");
// check for bad chars in the entry
for ($x = 1; $x < strlen($entry["EVENT"]); $x++)
if ( (ord($entry["EVENT"]{$x}) > 126) || (ord($entry["EVENT"]{$x}) < 32) )
echo nl2br("bad character: {$entry["EVENT"]{$x}} at " . $x / strlen($entry["EVENT"]) * 100 . "% of {$entry["EVENTTIME"]}\n");
preg_match_all("/imdb.com\/Title\?(\d*)\"/", $entry["EVENT"], $movie, PREG_SET_ORDER);
preg_match_all("/imdb.com\/title\/tt(\d*)\"/", $entry["EVENT"], $movie, PREG_SET_ORDER);
array_push($movies, $movie);
*/
switch ($_GET["type"]) {
case "substring":
if ($_GET["scope"] == "paragraph")
// conceptually incorrect
$search = "<p>(.*?)" . preg_replace("/\s+/", "(.*?)", $_GET["search"]) . "(.*?)<\/p>";
else
$search = preg_replace("/\s+/", "\s", $_GET["search"]);
break;
case "phrase":
$search = stripslashes($_GET["search"]);
break;
}
if (preg_match_all("/" . $search . "/" . (($_GET["case"] == "insensitive") ? "i" : ""), $entry[$look_in], $matches, PREG_SET_ORDER)) {
$index = (date("Y", strtotime($entry["EVENTTIME"])) - 2001) * 12 + date("n", strtotime($entry["EVENTTIME"])) - 7;
// if (array_key_exists($index, $databarx))
// $databarx[$index] += count($matches);
// else
// $databarx[$index] = count($matches);
show_results ($entry, $matches);
}
}
// see if any imdb links are duplicated
foreach ($movies as $day_number => $day) {
foreach ($day as $movie) {
if (array_key_exists($movie[1], $movielist))
$movielist[$movie[1]] = $movielist[$movie[1]] . "," . $day_number;
else
$movielist[$movie[1]] = $day_number;
}
}
foreach ($movielist as $key => $movie) {
if (!(strpos($movie, ","))) {
$key_index = array_keys(array_keys($movielist), $key);
array_splice($movielist, $key_index[0], 1);
}
}
foreach ($movielist as $key => $day) {
$today = explode(",", $day);
echo "dup imdb link: <a href=\"http://us.imdb.com/Title?" . $key . "\">movie</a> on ";
foreach ($today as $oneday)
echo "<a href=\"http://www.livejournal.com/users/" . $username . "/" . $entry_array[$oneday]["ITEMID"] . ".html\">" . $entry_array[$oneday]["EVENTTIME"] . "</a><b> || </b>";
echo nl2br("\n");
}
echo $journal_size;
/*
include ("jpgraph\jpgraph.php");
include ("jpgraph\jpgraph_line.php");
include ("jpgraph\jpgraph_bar.php");
include ("jpgraph\jpgraph_scatter.php");
$graph = new Graph(800, 600, "auto", -1);
$graph->SetScale("linlin");
$b1 = new ScatterPlot(array_values($databarx), array_keys($databarx));
$b1->mark->SetType(MARK_FILLEDCIRCLE);
$b1->mark->SetFillColor("red");
$b1->SetLinkPoints();
// $b1->SetLinkPoints();
$graph->Add($b1);
$graph->Stroke();
*/
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment