Skip to content

Instantly share code, notes, and snippets.

@Shimi9999
Created October 13, 2020 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Shimi9999/c81b7cb9c0ddaf419244e5167838452b to your computer and use it in GitHub Desktop.
Save Shimi9999/c81b7cb9c0ddaf419244e5167838452b to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/saintfish/chardet"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func main() {
flag.Parse()
path := "./"
if len(flag.Args()) > 1 {
fmt.Println("Usage: scanreadme <dirpath>")
os.Exit(1)
} else if len(flag.Args()) == 1 {
path = flag.Arg(0)
}
fInfo, err := os.Stat(path)
if err != nil {
fmt.Println("Path is wrong: ", err.Error())
os.Exit(1)
}
if !fInfo.IsDir() {
fmt.Println("The entered path is not directory")
os.Exit(1)
} else {
err = findInDirectory(path)
if err != nil {
fmt.Println(err.Error())
os.Exit(1)
}
}
}
func findInDirectory(dirPath string) error {
files, _ := ioutil.ReadDir(dirPath)
for _, f := range files {
filePath := filepath.Join(dirPath, f.Name())
if f.IsDir() {
err := findInDirectory(filePath)
if err != nil {
return err
}
} else if isTextFile(filePath) {
readFile(filePath)
}
}
return nil
}
func readFile(path string) error {
bytes, err := ioutil.ReadFile(path)
if err != nil {
return err
}
charset, err := detectEncoding(bytes)
if err != nil {
fmt.Println("Detect error:", path, err.Error())
return err
}
text, err := decodeText(string(bytes), charset)
if err != nil {
return err
}
const (
initialBufSize = 10000
maxBufSize = 1000000
)
scanner := bufio.NewScanner(strings.NewReader(text))
buf := make([]byte, initialBufSize)
scanner.Buffer(buf, maxBufSize)
type targetLine struct {
LineNumber int
Text string
}
targetLines := []targetLine{}
for lineNumber := 0; scanner.Scan(); lineNumber++ {
line := strings.ToLower(scanner.Text())
if err != nil {
return err
}
if regexp.MustCompile(`.*(許可|禁止|[22二]次|配布|流用|改変|無断|アーカイブ|ライセンス|自由に|著作|転載|license|copy|\(c\)|permission|reproduce).*`).MatchString(line) {
targetLines = append(targetLines, targetLine{lineNumber, line})
}
}
if scanner.Err() != nil {
return fmt.Errorf("file scan error: " + scanner.Err().Error())
}
if len(targetLines) > 0 {
fmt.Printf("%s\n", path)
for _, tl := range targetLines {
fmt.Printf(" (%d) %s\n", tl.LineNumber, tl.Text)
}
fmt.Println()
}
return nil
}
func isTextFile(path string) bool {
exts := []string{".txt", ".md", ".html", ".htm"}
return haveExt(path, &exts)
}
func haveExt(path string, exts *[]string) bool {
ext := filepath.Ext(path)
for _, e := range *exts {
if strings.ToLower(ext) == e {
return true
}
}
return false
}
func detectEncoding(bytes []byte) (string, error) {
det := chardet.NewTextDetector()
detResult, err := det.DetectBest(bytes)
if err != nil {
return "", err
}
return detResult.Charset, nil
}
func decodeText(text string, charset string) (string, error) {
var enc encoding.Encoding = nil
switch charset {
case "Shift_JIS", "windows-1251", "windows-1252", "ISO-8859-5":
enc = japanese.ShiftJIS
case "UTF-16LE":
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
}
if enc != nil {
decoded, _, err := transform.String(enc.NewDecoder(), text)
if err != nil {
return "", err
}
return decoded, nil
}
return text, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment