Last active
August 29, 2015 13:56
-
-
Save bpteam/8e7e5fa3dc02d2e99fcf to your computer and use it in GitHub Desktop.
Вешается на rss и парсит на наличие ключевых слов в описании проекта и титульника сайт Free-lance.ru
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE `flboost` ( | |
`id` int(11) NOT NULL, | |
`title` text, | |
`description` text, | |
`date` int(11) DEFAULT NULL, | |
`category` varchar(255) DEFAULT NULL, | |
`url` text, | |
PRIMARY KEY (`id`), | |
UNIQUE KEY `id_UNIQUE` (`id`) | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Created by PhpStorm. | |
* User: EC | |
* Date: 19.11.13 | |
* Time: 22:01 | |
* Project: free-notify | |
* @author: Evgeny Pynykh bpteam22@gmail.com | |
*/ | |
ini_set('display_errors',1); | |
require_once dirname(__FILE__).'/../_coolLib/loader/include.php'; | |
require_once dirname(__FILE__).'/cfg.php'; | |
$url = 'https://www.fl.ru/rss/all.xml?subcategory=37&category=5'; | |
$gc = new \GetContent\cSingleCurl(); | |
//$gc->setEncodingAnswer(false); | |
$content = $gc->getContent($url); | |
$feed = array(); | |
if(preg_match('%\(Все проекты:\s(?<category>((?!\s-\s).)*)((?:\s-\s)(?<subcategory>[^<]+))?\)</title>%ims', $content, $match)){ | |
$feed['category'] = $match['category']; | |
if(isset($match['subcategory'])){ | |
$feed['subcategory'] = $match['subcategory']; | |
} | |
} | |
if(preg_match_all('%<item>\s*<title><\!\[CDATA\[(?<title>.*)\]\]></title>\s*<link>(?<link>.*)</link>\s*<description><\!\[CDATA\[(?<description>.*)\]\]></description>\s*<guid>(?<guid>.*)</guid>\s*<category>(?<category>.*)</category>\s*<pubDate>(?<pubdate>.*)</pubDate>\s*</item>%imsU',$content,$matches)){ | |
foreach($matches['title'] as $keyRow => $valRow){ | |
$row['title'] = $matches['title'][$keyRow]; | |
$row['link'] = $matches['link'][$keyRow]; | |
$row['id'] = preg_replace('%^.*/(\d+)/.*$%ims','$1',$row['link']); | |
$row['description'] = $matches['description'][$keyRow]; | |
$row['guid'] = $matches['guid'][$keyRow]; | |
$category = $matches['category'][$keyRow]; | |
$category = explode( '<br/>', $category); | |
$row['category'] = $category; | |
$row['pubdate'] = strtotime($matches['pubdate'][$keyRow]); | |
$feed['row'][] = $row; | |
} | |
} | |
$reg = '%(парсер|собрать|парс|parser|pars|сбор|парсинг|скачать|загрузить|наполнить|слить)%imsu'; | |
foreach($feed['row'] as $project){ | |
preg_match('%https://www.fl.ru/projects/(?<id>\d+)/%', $project['link'], $match); | |
$project['id'] = $match['id']; | |
if((preg_match($reg,$project['title']) || preg_match($reg,$project['description'])) && isUnique($project)){ | |
$query = "INSERT INTO `flboost` (`id`, `title`, `description`, `date`, `category`, `url`) | |
VALUES ({$project['id']}, '".$mysqli->escape_string($project['title'])."', '".$mysqli->escape_string($project['description'])."', '". $project['pubdate'] ."', '".implode("|",$project['category'])."', '{$project['link']}')"; | |
$mysqli->query($query); | |
echo 'SEND!'; | |
mail('zking.nothingz@gmail.com','ALERT ' . $project['title'], $project['title'] . "\n" . date('c',$project['pubdate']) . "\n" . $project['link'] . "\n" . $project['description']); | |
} else { | |
//echo 'NOT!'; | |
} | |
} | |
echo 'done!'; | |
function isUnique($data){ | |
global $mysqli; | |
$query = "SELECT url FROM `flboost` WHERE `id` = {$data['id']}"; | |
$result = $mysqli->query($query); | |
return !$result->num_rows; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment