Skip to content

Instantly share code, notes, and snippets.

@qxxt
Last active January 12, 2022 02:00
Show Gist options
  • Save qxxt/f43af14081c00d22c2ed1848c1c2627b to your computer and use it in GitHub Desktop.
Save qxxt/f43af14081c00d22c2ed1848c1c2627b to your computer and use it in GitHub Desktop.
Parsing hosts Golang
// BSD Zero Clause License
//
// Copyright (c) 2022 qxxt
//
// Permission to use, copy, modify, and/or distribute this software for
// any purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
// WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
// AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
// CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
// OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
//
// Note:
// ParseHostsLine() is much slower than ParseHostsByte()
// And ParseHostsByte() has no slice capacity overhead.
// ParseHostsLine() is only kept to validate the results
// from ParseHostsByte(). Because it is hard to debug them.
package main
import (
"bytes"
"log"
"reflect"
"testing"
)
type HostsItem struct {
Ip []byte
Aliases [][]byte
}
var (
TestFile = "hosts.txt"
MaxLine = 20
LineRes, ByteRes []HostsItem
)
var TestData = []byte(`
# [zapr.in]
127.0.0.1 appmm.zapr.in
127.0.0.1 sdk.zapr.in #
127.0.0.1 submit.zapr.in#
#hdjjdj
# [zarget.com]
127.0.0.1 zarget.com jdjdjdj djdjjd
127.0.0.1 cdn.zarget.com djjdjd#jdjjd
`)
func init() {
var err error
//TestData, err = os.ReadFile(TestFile)
if err != nil {
log.Fatal(err)
}
}
func BenchmarkLine(b *testing.B) {
for i := 0; i < b.N; i++ {
LineRes = ParseHostsLine(TestData)
}
}
func BenchmarkByte(b *testing.B) {
for i := 0; i < b.N; i++ {
ByteRes = ParseHostsByte(TestData)
}
}
func TestEquality(t *testing.T) {
if ByteRes == nil {
ByteRes = ParseHostsByte(TestData)
}
if LineRes == nil {
LineRes = ParseHostsLine(TestData)
}
if len(ByteRes) != len(LineRes) {
t.Logf("Unequal Length\nByteRes: %d\nLineRes: %d",
len(ByteRes), len(LineRes))
t.FailNow()
}
for i := range ByteRes {
if !reflect.DeepEqual(ByteRes[i].Ip, LineRes[i].Ip) {
t.Logf("Index: %d\nByteRes.Ip: %q\nLineRes.Ip: %q",
i, ByteRes[i].Ip, LineRes[i].Ip)
t.FailNow()
}
if !reflect.DeepEqual(ByteRes[i].Aliases, LineRes[i].Aliases) {
t.Logf("Index: %d\nByteRes.Aliases: %q\nLineRes.Aliases: %q",
i, bytes.Join(ByteRes[i].Aliases, []byte(", ")),
bytes.Join(LineRes[i].Aliases, []byte(", ")))
t.FailNow()
}
}
}
func ParseHostsLine(b []byte) []HostsItem {
bOl := bytes.Split(b, []byte("\n"))
res := make([]HostsItem, len(bOl))
i := 0
for _, bl := range bOl {
if len(bl) == 0 || bl[0] == '#' {
continue
}
bb := bytes.Fields(
bytes.SplitN(bl, []byte("#"), 2)[0])
if len(bb) < 2 {
continue
}
res[i].Ip = bb[0]
res[i].Aliases = bb[1:]
i++
}
return res[:i]
}
func ParseHostsByte(b []byte) []HostsItem {
var (
comment, event bool
mark = make([]int, 0, 4)
region = make([][]int, 0, bytes.Count(b, []byte("\n")))
)
if b[len(b)-1] != '\n' {
b = append(b, '\n')
}
for i := 0; i < len(b); i++ {
switch b[i] {
case ' ', '\t':
if event {
mark = append(mark, i)
event = false
}
case '\n':
if comment {
comment = false
}
if l := len(mark); l != 0 {
if event {
mark = append(mark, i)
event = false
}
if l > 2 {
region = append(region, mark)
}
mark = make([]int, 0, 4)
}
case '#':
if !comment {
comment = true
if event {
mark = append(mark, i)
event = false
}
}
default:
if !comment && !event {
mark = append(mark, i)
event = true
}
}
}
var res = make([]HostsItem, len(region))
for i, l := 0, len(res); i < l; i++ {
res[i].Ip = b[region[i][0]:region[i][1]]
ll := len(region[i][2:]) / 2
res[i].Aliases = make([][]byte, ll)
for ii, iii := 0, 2; ii < ll; ii++ {
res[i].Aliases[ii] =
b[region[i][iii]:region[i][iii+1]]
iii += 2
}
}
return res
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment