Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jessedearing
Created October 24, 2016 05:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jessedearing/22b313347747423b4bbf61b7ed73e0a4 to your computer and use it in GitHub Desktop.
Save jessedearing/22b313347747423b4bbf61b7ed73e0a4 to your computer and use it in GitHub Desktop.
package main
import (
"database/sql"
"fmt"
"log"
"os"
"path/filepath"
"regexp"
"sync"
_ "github.com/go-sql-driver/mysql"
)
func main() {
// Connect to the database
db, err := sql.Open("mysql", os.Getenv("MYSQL_USER")+":"+os.Getenv("MYSQL_PWD")+"@tcp(127.0.0.1:3306)/jobs")
if err != nil {
log.Panic(err)
}
defer db.Close()
// Find all the files we're going to read
files, err := filepath.Glob("./**/*.txt")
if err != nil {
log.Panic(err)
}
// Drop the table so we can recreate it
_, err = db.Exec("drop table if exists words")
if err != nil {
log.Panic(err)
}
// Recreate the table
_, err = db.Exec("create table words (id bigint unsigned auto_increment primary key, position int, file varchar(80), word varchar(255), key ix_Word (word))")
if err != nil {
log.Panic(err)
}
// Use a waitgroup so we can run loading every file in it's own goroutine
var wg *sync.WaitGroup
wg = new(sync.WaitGroup)
for _, file := range files {
wg.Add(1)
go processFile(file, db, wg)
}
wg.Wait()
}
func processFile(file string, db *sql.DB, wg *sync.WaitGroup) {
defer wg.Done()
fileh, err := os.Open(file)
if err != nil {
log.Panic(err)
}
defer fileh.Close()
var word, rawString string
var x int
for {
// Fscan will read non-whitespace characters
l, err := fmt.Fscan(fileh, &rawString)
if l == 0 {
break
}
// Regex out any additional junk
exp := regexp.MustCompile("\\w+")
words := exp.FindStringSubmatch(rawString)
// Skip if there was all junk
if len(words) == 0 {
continue
}
word = words[0]
filename := filepath.Base(file)
_, err = db.Exec("insert into words (position, file, word) values (?,?,?)", x, filename, word)
if err != nil {
log.Panic(err)
}
// Increment so we can track the position the words are in
x++
if err != nil {
fmt.Print(err)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment