Skip to content

Instantly share code, notes, and snippets.

@yashihei
Last active December 10, 2023 04:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yashihei/8fdf1895620d391600b5fded59a62189 to your computer and use it in GitHub Desktop.
Save yashihei/8fdf1895620d391600b5fded59a62189 to your computer and use it in GitHub Desktop.
hatebu_to_scrapbox.go(ほぼChatGPTに書いてもらった)
package main
import (
"encoding/json"
"fmt"
"net/http"
"os"
"strconv"
"time"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)
type ScrapboxPage struct {
Title string `json:"title"`
Lines []string `json:"lines"`
}
type ScrapboxExport struct {
Pages []ScrapboxPage `json:"pages"`
}
type OGPInfo struct {
Title string
Description string
Image string
}
func fetchOGPInfo(url string) (*OGPInfo, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Content-Typeヘッダーから文字エンコーディングを取得し、変換する
utf8Reader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
if err != nil {
return nil, err
}
tokenizer := html.NewTokenizer(transform.NewReader(utf8Reader, transform.Chain()))
ogpInfo := &OGPInfo{}
for {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
return ogpInfo, nil
case html.StartTagToken, html.SelfClosingTagToken:
token := tokenizer.Token()
if token.Data == "meta" {
property := ""
content := ""
for _, attr := range token.Attr {
if attr.Key == "property" {
property = attr.Val
} else if attr.Key == "content" {
content = attr.Val
}
}
switch property {
case "og:title":
ogpInfo.Title = content
case "og:description":
ogpInfo.Description = content
case "og:image":
ogpInfo.Image = content
}
}
}
}
}
func parseTokenAttributes(token html.Token) (string, string, string) {
url := ""
title := ""
addDate := ""
for _, attr := range token.Attr {
if attr.Key == "href" {
url = attr.Val
}
if attr.Key == "add_date" {
unixTime, err := strconv.ParseInt(attr.Val, 10, 64)
if err != nil {
fmt.Println("Error parsing ADD_DATE for", url, ":", err)
continue
}
t := time.Unix(unixTime, 0)
addDate = t.Format("2006-01-02")
}
}
return url, title, addDate
}
func main() {
fileName := "yashihei.bookmarks.sample.html"
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer file.Close()
tokenizer := html.NewTokenizer(file)
var export ScrapboxExport
for {
tokenType := tokenizer.Next()
switch tokenType {
case html.ErrorToken:
goto Finish
case html.StartTagToken, html.SelfClosingTagToken:
token := tokenizer.Token()
if token.Data == "a" {
url, title, addDate := parseTokenAttributes(token)
tokenType = tokenizer.Next()
if tokenType == html.TextToken {
title = tokenizer.Token().Data
}
lines := []string{title, "[" + url + " " + title + "]"}
if addDate != "" {
lines = append(lines, "bookmarked: "+addDate)
}
ogp, err := fetchOGPInfo(url)
if err != nil {
fmt.Println("Error fetching OGP info for", url, ":", err)
} else {
if ogp.Image != "" {
lines = append(lines, "["+ogp.Image+"]")
}
if ogp.Description != "" {
lines = append(lines, "> "+ogp.Description)
}
}
lines = append(lines, "")
page := ScrapboxPage{
Title: title,
Lines: lines,
}
export.Pages = append(export.Pages, page)
fmt.Println(page)
}
}
}
Finish:
jsonData, err := json.Marshal(export)
if err != nil {
panic(err)
}
os.WriteFile("export_to_scrapbox.json", jsonData, 0644)
fmt.Println("Export completed.")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment