Last active
December 10, 2023 04:27
-
-
Save yashihei/8fdf1895620d391600b5fded59a62189 to your computer and use it in GitHub Desktop.
hatebu_to_scrapbox.go(ほぼChatGPTに書いてもらった)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"fmt" | |
"net/http" | |
"os" | |
"strconv" | |
"time" | |
"golang.org/x/net/html" | |
"golang.org/x/net/html/charset" | |
"golang.org/x/text/transform" | |
) | |
type ScrapboxPage struct { | |
Title string `json:"title"` | |
Lines []string `json:"lines"` | |
} | |
type ScrapboxExport struct { | |
Pages []ScrapboxPage `json:"pages"` | |
} | |
type OGPInfo struct { | |
Title string | |
Description string | |
Image string | |
} | |
func fetchOGPInfo(url string) (*OGPInfo, error) { | |
resp, err := http.Get(url) | |
if err != nil { | |
return nil, err | |
} | |
defer resp.Body.Close() | |
// Content-Typeヘッダーから文字エンコーディングを取得し、変換する | |
utf8Reader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type")) | |
if err != nil { | |
return nil, err | |
} | |
tokenizer := html.NewTokenizer(transform.NewReader(utf8Reader, transform.Chain())) | |
ogpInfo := &OGPInfo{} | |
for { | |
tokenType := tokenizer.Next() | |
switch tokenType { | |
case html.ErrorToken: | |
return ogpInfo, nil | |
case html.StartTagToken, html.SelfClosingTagToken: | |
token := tokenizer.Token() | |
if token.Data == "meta" { | |
property := "" | |
content := "" | |
for _, attr := range token.Attr { | |
if attr.Key == "property" { | |
property = attr.Val | |
} else if attr.Key == "content" { | |
content = attr.Val | |
} | |
} | |
switch property { | |
case "og:title": | |
ogpInfo.Title = content | |
case "og:description": | |
ogpInfo.Description = content | |
case "og:image": | |
ogpInfo.Image = content | |
} | |
} | |
} | |
} | |
} | |
func parseTokenAttributes(token html.Token) (string, string, string) { | |
url := "" | |
title := "" | |
addDate := "" | |
for _, attr := range token.Attr { | |
if attr.Key == "href" { | |
url = attr.Val | |
} | |
if attr.Key == "add_date" { | |
unixTime, err := strconv.ParseInt(attr.Val, 10, 64) | |
if err != nil { | |
fmt.Println("Error parsing ADD_DATE for", url, ":", err) | |
continue | |
} | |
t := time.Unix(unixTime, 0) | |
addDate = t.Format("2006-01-02") | |
} | |
} | |
return url, title, addDate | |
} | |
func main() { | |
fileName := "yashihei.bookmarks.sample.html" | |
file, err := os.Open(fileName) | |
if err != nil { | |
panic(err) | |
} | |
defer file.Close() | |
tokenizer := html.NewTokenizer(file) | |
var export ScrapboxExport | |
for { | |
tokenType := tokenizer.Next() | |
switch tokenType { | |
case html.ErrorToken: | |
goto Finish | |
case html.StartTagToken, html.SelfClosingTagToken: | |
token := tokenizer.Token() | |
if token.Data == "a" { | |
url, title, addDate := parseTokenAttributes(token) | |
tokenType = tokenizer.Next() | |
if tokenType == html.TextToken { | |
title = tokenizer.Token().Data | |
} | |
lines := []string{title, "[" + url + " " + title + "]"} | |
if addDate != "" { | |
lines = append(lines, "bookmarked: "+addDate) | |
} | |
ogp, err := fetchOGPInfo(url) | |
if err != nil { | |
fmt.Println("Error fetching OGP info for", url, ":", err) | |
} else { | |
if ogp.Image != "" { | |
lines = append(lines, "["+ogp.Image+"]") | |
} | |
if ogp.Description != "" { | |
lines = append(lines, "> "+ogp.Description) | |
} | |
} | |
lines = append(lines, "") | |
page := ScrapboxPage{ | |
Title: title, | |
Lines: lines, | |
} | |
export.Pages = append(export.Pages, page) | |
fmt.Println(page) | |
} | |
} | |
} | |
Finish: | |
jsonData, err := json.Marshal(export) | |
if err != nil { | |
panic(err) | |
} | |
os.WriteFile("export_to_scrapbox.json", jsonData, 0644) | |
fmt.Println("Export completed.") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment