Skip to content

Instantly share code, notes, and snippets.

@cassava
Created July 13, 2014 15:34
Show Gist options
  • Save cassava/07744544fa0f1083b2a9 to your computer and use it in GitHub Desktop.
Save cassava/07744544fa0f1083b2a9 to your computer and use it in GitHub Desktop.
Matching lines in a text via three letter substrings
Afghanistan
Albania
Algeria
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Bolivia
Bosnia and Herzegovina
Botswana
Brazil
Brunei
Bulgaria
Burkina Faso
Burundi
Cabo Verde
Cambodia
Cameroon
Canada
Central African Republic
Chad
Chile
China
Colombia
Comoros
Costa Rica
Cote d'Ivoire
Country
Croatia
Cuba
Cyprus
Czech Republic
Dem. Rep. of the Congo
Denmark
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Ethiopia
Fiji
Finland
France
Gabon
Georgia
Germany
Ghana
Greece
Grenada
Guatemala
Guinea
Guinea-Bissau
Guyana
Haiti
Honduras
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Country
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Kiribati
Kosovo
Kuwait
Kyrgyzstan
Laos
Latvia
Lebanon
Lesotho
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
Macedonia
Madagascar
Malawi
Malaysia
Maldives
Mali
Malta
Marshall Islands
Mauritania
Mauritius
Mexico
Micronesia
Moldova
Monaco
Mongolia
Montenegro
Morocco
Mozambique
Myanmar
Namibia
Nauru
Nepal
Country
Netherlands
New Zealand
Nicaragua
Niger
Nigeria
North Korea
Norway
Oman
Pakistan
Palau
Palestine
Panama
Papua New Guinea
Paraguay
Peru
Philippines
Poland
Portugal
Qatar
Rep. of the Congo
Romania
Russia
Rwanda
Samoa
San Marino
São Tomé and Príncipe
Saudi Arabia
Senegal
Serbia
Seychelles
Sierra Leone
Singapore
Slovakia
Slovenia
Solomon Islands
Somalia
South Africa
South Korea
South Sudan
Spain
Country
Sri Lanka
St. Kitts and Nevis
St. Lucia
St. Vincent & The Grenadines
Sudan
Suriname
Swaziland
Sweden
Switzerland
Syria
Taiwan
Tajikistan
Tanzania
Thailand
The Gambia
Timor-Leste
Togo
Tonga
Trinidad and Tobago
Tunisia
Turkey
Turkmenistan
Tuvalu
Uganda
Ukraine
United Arab Emirates
United Kingdom
United States of America
Uruguay
Uzbekistan
Vanuatu
Vatican City
Venezuela
Vietnam
Yemen
Zambia
Zimbabwe
// Copyright (c) 2014 Ben Morgan <neembi@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
// the Software, and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// This program takes a text file full of lines as input, and then tries to match
// those lines with three letter substrings, hence the dumb name substrings.go.
package main
import (
"bufio"
"fmt"
"io"
"os"
"sort"
"strings"
)
func main() {
var (
listfile string
inputfile string
)
switch len(os.Args[1:]) {
case 2:
inputfile = os.Args[2]
fallthrough
case 1:
listfile = os.Args[1]
default:
fmt.Printf("Usage: %s <list> [substrings]\n", os.Args[0])
os.Exit(1)
}
file, err := os.Open(listfile)
if err != nil {
die(err)
}
defer file.Close()
var stdin bool
var input *os.File
if inputfile != "" {
input, err = os.Open(inputfile)
if err != nil {
die(err)
}
defer input.Close()
} else {
stdin = true
input = os.Stdin
}
err = match(file, input, stdin)
if err != nil {
die(err)
}
}
func match(inlist io.Reader, input io.Reader, stdin bool) error {
var list []string
matched := make(map[string]bool)
err := foreachLine(inlist, true, func(s string) error {
list = append(list, s)
matched[s] = false
return nil
})
if err != nil {
return err
}
if stdin {
fmt.Print("> ")
}
err = foreachLine(input, false, func(s string) error {
var n, m int
if len(s) == 0 {
goto nextLine
} else if len(s) != 3 {
fmt.Printf("Ignoring %q: wrong length\n", s)
goto nextLine
}
if s[1] == ' ' {
fmt.Printf("Ignoring %q: contains space\n", s)
goto nextLine
}
s = strings.ToLower(s)
for _, item := range list {
if strings.Contains(strings.ToLower(item), s) {
m++
if v := matched[item]; !v {
matched[item] = true
n++
}
}
}
nextLine:
if stdin {
if m > 0 {
fmt.Printf("matched %d/%d\n", n, m)
}
fmt.Print("> ")
} else {
fmt.Printf("%q matched %d/%d\n", s, n, m)
}
return nil
})
var n int
var um []string
for k, v := range matched {
if v {
n++
} else {
um = append(um, k)
}
}
fmt.Printf("\n\n%d from %d lines matched.\n", n, len(list))
fmt.Println("Not matched were:")
sort.Strings(um)
for _, item := range um {
fmt.Println(" ", item)
}
return nil
}
func die(err error) {
fmt.Println("Error:", err)
os.Exit(1)
}
// foreachLine executes f for each non-empty line in the reader.
func foreachLine(r io.Reader, skip bool, f func(string) error) error {
buf := bufio.NewReader(r)
for {
line, err := buf.ReadString('\n')
if err != nil && len(line) == 0 {
if err == io.EOF {
break
}
return err
}
line = strings.TrimSpace(line)
if skip && len(line) == 0 {
continue
}
if err = f(line); err != nil {
return err
}
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment