This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
444 | |
Magyar Narancs | |
Mandiner | |
888 | |
Alfahír | |
Azonnali | |
Mérce | |
Válasz Online | |
Híradó | |
Pesti Srácok |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function lists(siteIds, categoryIds, until, limit, isExtracted = true) { | |
return Article.findAndCountAll({ | |
limit, | |
where: { | |
is_content_extracted: isExtracted, | |
published_at: { | |
[Op.lt]: until, | |
}, | |
}, | |
order: [["published_at", "DESC"]], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT "Article"."id", "Article"."title", "Article"."url", "Article"."description", "Article"."published_at", "Article"."estimated_read_time", "Article"."image", "Category"."id" AS "Category.id", "Category"."name" AS "Category.name", "Category"."alias" AS "Category.alias", "Category"."created_at" AS "Category.createdAt", "Category"."updated_at" AS "Category.updatedAt", "Site"."id" AS "Site.id", "Site"."name" AS "Site.name", "Site"."slug" AS "Site.slug", "Site"."url" AS "Site.url", "Site"."state" AS "Site.state", "Site"."feed" AS "Site.feed", "Site"."scraping_method" AS "Site.scraping_method", "Site"."created_at" AS "Site.createdAt", "Site"."updated_at" AS "Site.updatedAt" | |
FROM "articles" AS "Article" | |
INNER JOIN "categories" AS "Category" ON "Article"."category_id" = "Category"."id" | |
AND "Category"."id" IN ('1', '2', '3', '4', '5', '6', '7') | |
INNER JOIN "sites" AS "Site" ON "Article"."site_id" = "Site"."id" | |
AND "Site"."id" IN ('27', '31', '2', '7', '9', '19', '10', '20', '32', '29', '13', '23', '25', '18', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { Article, Category, Site, Sequelize } = require("db"); | |
const Classifier = require("classifier")(); | |
const Scraper = require("scraper")(); | |
const { waitASec, waitAMinute } = require("../utils"); | |
const { Op } = Sequelize; | |
let articles = []; | |
function cleanObject(object) { | |
if (typeof object !== "object") { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const MetaExtractor = require("./extractors/meta"); | |
const SelectorExtractor = require("./extractors/selectors"); | |
const ReadabilityExtractor = require("./extractors/readability"); | |
const PuppeteerExtractor = require("./extractors/puppeteer"); | |
module.exports = function (HTTPClient, HTMLParser) { | |
return { | |
initPuppeteer() { | |
return PuppeteerExtractor.start(); | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://index.hu/kultur/fabryonline/ | |
https://index.hu/tech/net/pontcom/ | |
https://index.hu/belfold/viszkis1/ | |
https://index.hu/belfold/viszkis3/ | |
https://index.hu/belfold/viszkis2/ | |
https://index.hu/belfold/magyaronline/ | |
https://index.hu/belfold/viszkis4/ | |
https://index.hu/belfold/viszkis5/ | |
https://index.hu/bulvar/lada2/ | |
https://index.hu/bulvar/lada/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fetch = require("node-fetch"); | |
const fs = require("fs"); | |
const cheerio = require("cheerio"); | |
const { addDays, format } = require("date-fns"); | |
const { promisify } = require("util"); | |
const readFile = promisify(fs.readFile); | |
const writeFile = promisify(fs.writeFile); | |
const appendFile = promisify(fs.appendFile); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# CFLAGS="-Wno-narrowing" pip3 install cld2-cffi | |
# pip3 install multi_rake | |
from multi_rake import Rake | |
text_en = ( | |
'Az eddig klímahisztivel riogató magyar kormány néhány nap leforgása alatt a klímavédelem élharcosa lett. Meg is hirdették a Klímavédelmi akciótervet, amelynek egyik sarkalatos pontja, hogy 2022-től a hazai városi közlekedési cégek kizárólag elektromos autóbuszokat vásárolhatnak. Ezt a beszerzést egyrészt az EIB gigahiteléből, másrészt pedig a szén-dioxid- kvóta eladásából oldaná meg az ország vezetése. De vajon jól gondolkodik a kormány, tényleg az elektromos buszokkal fogjuk megmenteni a bolygót? Vagy képviselőink megint fordítva ültek fel a lóra, és fogalmuk sincs arról, hogy mennyi problémát zúdítanak ezzel az üzemeltetők és az utasok nyakába? A magyar kormány prominensei néhány héttel ezelőtt még klímahisztiről beszéltek, ha szóba jött, hogy mit tesz a kormány a környezet megvédése érdekben. A korábbi retorikát Orbán Viktor a január eleji kormányinfón sutba dobta, és meghirdette a klímav |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* article_selector migration */ | |
"use strict"; | |
module.exports = { | |
up: (queryInterface, Sequelize) => { | |
return queryInterface.createTable("ArticleSelectors", { | |
id: { | |
allowNull: false, | |
autoIncrement: true, | |
primaryKey: true, | |
type: Sequelize.INTEGER, |
This file has been truncated, but you can view the full file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="UTF-8"/> | |
<title>Kepler.gl embedded map</title> | |
<!--Uber Font--> | |
<link rel="stylesheet" href="https://d1a3f4spazzrp4.cloudfront.net/kepler.gl/uber-fonts/4.0.0/superfine.css"> |
NewerOlder