This crawler gets all posts of a given Facebook group plus all events from a set of given Facebook pages.
Things todo:
- crawl multiple Facebook groups
# Webスクレイピングのサンプル(るりまサーチを例にして) | |
require 'open-uri' # ダウンロード用のライブラリ | |
require 'nokogiri' # パース用のライブラリ | |
Encoding.default_external = "UTF-8" # 内部のエンコーディングをUTF-8にしておく | |
keyword = "include" # 検索キーワード | |
# 検索用URLを作成 | |
url = "https://docs.ruby-lang.org/ja/search/query:#{keyword}/" | |
# ダウンロード処理(charsetに対象サイトのエンコーディングが入る) |
// Create Docker | |
// Run Docker | |
// Grab Twitter | |
// save to firebase | |
// send link | |
// shut it down | |
const TwitterCrawler = require('twitter-crawler'); | |
const fs = require('fs'); | |
const log = require('winston'); |
var Crawler = require("simplecrawler"); | |
var Url = require("url"); | |
var target = "https://graph.facebook.com/ledzeppelin/feed?access_token=1597581200507009%7Ce749be55ea86249f92ae56b081c37b38&fields=from%2Cmessage%2Ccreated_time%2Ctype%2Clink%2Ccomments.summary(true)%2Clikes.summary(true)%2Cshares&since=2016-07-11&until=2016-07-14&limit=10"; | |
var url = Url.parse(target); | |
var crawler = new Crawler(url.host); | |
crawler.initialPath = url.path; | |
crawler.initialPort = 443; | |
crawler.initialProtocol = "https"; |
#!/usr/bin/env ruby | |
require 'uri' | |
require 'nokogiri' | |
require 'mechanize' | |
require 'logger' | |
trap('INT') { @crawler.report; exit } | |
class Crawler |
var cheerio = require('cheerio'); | |
var Crawler = require('simplecrawler'); | |
var initialTopic = 'SpaceX'; | |
var blacklist = ["#", "/w/", "/static/", "/api/", "/beacon/", "File:", | |
"Wikipedia:", "Template:", "MediaWiki:", "Help:", "Special:", | |
"Category:", "Portal:", "Main_Page", "Talk:", "User:", | |
"User_talk:", "Template_talk:", "Module:"]; //useless special cases from wikipedia | |
var url = '/wiki/' + initialTopic; |
var cheerio = require('cheerio'); | |
var Crawler = require('simplecrawler'); | |
var initialTopic = 'SpaceX'; | |
var blacklist = ["#", "/w/", "/static/", "/api/", "/beacon/", "File:", | |
"Wikipedia:", "Template:", "MediaWiki:", "Help:", "Special:", | |
"Category:", "Portal:", "Main_Page", "Talk:", "User:", | |
"User_talk:", "Template_talk:", "Module:"]; //useless special cases from wikipedia | |
var url = '/wiki/' + initialTopic; |
#!/usr/bin/ruby | |
require 'capybara' | |
require 'capybara/dsl' | |
require 'capybara/poltergeist' | |
require 'nokogiri' | |
require 'open-uri' | |
Capybara.configure do |config| |
This crawler gets all posts of a given Facebook group plus all events from a set of given Facebook pages.
Things todo:
<? | |
///////////////////// | |
// slack2html | |
// by @levelsio | |
///////////////////// | |
// | |
///////////////////// | |
// WHAT DOES THIS DO? | |
///////////////////// | |
// |
# Requires the silver searcher - https://github.com/ggreer/the_silver_searcher | |
# Leave empty line at the end | |
while IFS= read -r line; do | |
echo '-------------------------' | |
echo "Checking for word(s): $line" | |
ag -i "$line" chapters/ | |
done < valid_words.txt |