スクレイピング、まだBeautifulSoupで頑張っていますか。
もちろん、今でもHTMLを直接取りに行くべきケースはあります。大量の定型ページを安定して処理する、対象サイトの許可がある、DOM構造が読める、抽出項目が固定されている。そういう場合は、普通にスクレイパーを書いた方が安いし速いです。
でも最近、実務で欲しいものは単なるHTMLではなくなってきました。
欲しいのは、だいたいこういうものです。
name: gcp-rag-agent-architect-2026 description: Use when an AI agent must design, implement, benchmark, or review a Google Cloud RAG system, including parsing, chunking, embeddings, Cloud SQL/AlloyDB/Agent Search/RAG Engine/Vector Search choices, reranking, optional external-web grounding, cost, migration, and evaluation. version: 1.0.0 author: Hermes Agent license: MIT metadata: hermes: tags: [gcp, rag, embeddings, vector-search, evaluation, agentic-rag] related_skills: [rag-application-prototyping, research-verification-workflows, product-data-systems]
| echo 'Hello World!' | |
| # aptリポジトリの更新とソフトウェア更新 | |
| sudo apt update && sudo apt upgrade -y && sudo apt autoremove -y | |
| # Python実行環境の構築 | |
| curl -sSf https://rye-up.com/get | RYE_INSTALL_OPTION="--yes" bash | |
| source "$HOME/.rye/env" | |
| echo 'Python実行環境がインストールされました。' |
| // Twitterのミュート設定ページに移動 | |
| location.assign("https://twitter.com/settings/muted_keywords"); | |
| // 要素の値を設定する簡略化された関数 | |
| function setNativeValue(element, value) { | |
| const { set: valueSetter } = Object.getOwnPropertyDescriptor(element, 'value') || {}; | |
| valueSetter?.call(element, value); | |
| } | |
| // 指定された秒数だけ遅延する関数 |
| [ | |
| "ラーメン", | |
| "NekonekoServer" | |
| ] |
| from playwright.sync_api import Playwright, sync_playwright, expect | |
| import time | |
| import re | |
| def login(playwright: Playwright) -> None: | |
| button_click_count = 0 | |
| try: | |
| browser = playwright.firefox.launch(headless=False) | |
| context = browser.new_context() | |
| page = context.new_page() |
| const SECRET_KEY = "input OpenAI token"; | |
| //const MAX_TOKENS = 10; | |
| const MODEL_NAME = "text-davinci-003"; // more structured and deterministic: for data | |
| //const MODEL_NAME = "davinci"; // more flexible and creative: for stories, chatbots | |
| const MODEL_TEMP = 0.3; | |
| function GPT(prompt, max_tokens = 200) { | |
| const url = "https://api.openai.com/v1/completions"; | |
| const payload = { | |
| model: MODEL_NAME, |
| import tweepy | |
| import requests | |
| import time | |
| import openai | |
| from datetime import date | |
| import schedule | |
| import replicate | |
| import os | |
| from PIL import Image | |
| import random |
| import SpeechRecognition, { | |
| useSpeechRecognition, | |
| } from "react-speech-recognition"; | |
| const { transcript, listening, resetTranscript } = useSpeechRecognition(); | |
| function mic() { | |
| if (listening) { | |
| SpeechRecognition.stopListening(); | |
| setMsg(transcript); |
| let speak = (text: String) => speechSynthesis.speak(new SpeechSYnthesisUtterance(text)) |