Skip to content

Instantly share code, notes, and snippets.

@rafaelhenrique
Last active October 20, 2018 00:59
Show Gist options
  • Save rafaelhenrique/1d064ab71ee7d3de5cd04be1f4582e50 to your computer and use it in GitHub Desktop.
Save rafaelhenrique/1d064ab71ee7d3de5cd04be1f4582e50 to your computer and use it in GitHub Desktop.
Go improves Python?
import re
def find_cnpj_using_in(content, company_name):
line = next(line for line in content.split('\n') if company_name in line)
line = int(line[2:16])
return line
def find_cnpj_using_search(content, company_name):
expression = r'\d{{2}}(\d{{14}}).*{}.*'.format(company_name)
return re.search(expression, content).group(1)
def find_cnpj_using_findall(content, company_name):
expression = r'\d{{2}}(\d{{14}}).*{}.*'.format(company_name)
pattern = re.compile(expression)
return pattern.findall(content)[0]
if __name__ == '__main__':
# Real data about CNPJ - too slow, too large and not versioned
#
# with open('./data/F.K03200UF.D71214PR', 'r', encoding='iso8859') as fp:
# content = fp.read()
with open('./data/MINIMAL', 'r', encoding='iso8859') as fp:
content = fp.read()
company_name = 'OLIST SERVICOS DIGITAIS LTDA'
print("find_cnpj_using_in result: ", find_cnpj_using_in(content, company_name))
print("find_cnpj_using_search result: ", find_cnpj_using_search(content, company_name))
print("find_cnpj_using_findall result: ", find_cnpj_using_findall(content, company_name))
package main
import (
"fmt"
"io/ioutil"
"regexp"
"strconv"
"strings"
)
import "C"
//export FindCnpjByRegex
func FindCnpjByRegex(content, company string) (cnpj int) {
pattern := regexp.MustCompile(`\d{2}(\d{14}).*` + company + `.*`)
result := pattern.FindStringSubmatch(content)
if len(result) == 0 {
return
}
cnpj, _ = strconv.Atoi(result[1])
return
}
//export FindCnpjByContains
func FindCnpjByContains(content, company string) (cnpj int) {
splitedContent := strings.Split(content, "\n")
for _, line := range splitedContent {
if strings.Contains(line, company) {
cnpj, _ = strconv.Atoi(line[2:16])
return
}
}
return
}
func main() {
// Real data about CNPJ - too slow, too large and not versioned
//
// file, err := ioutil.ReadFile("./data/F.K03200UF.D71214PR")
file, err := ioutil.ReadFile("./data/MINIMAL")
if err != nil {
fmt.Printf("Error to open file. Error: %v\n", err.Error())
}
content := string(file)
cnpj := FindCnpjByRegex(content, "OLIST SERVICOS DIGITAIS LTDA")
fmt.Printf("FindCnpjByRegex result: %d\n", cnpj)
cnpj = FindCnpjByContains(content, "OLIST SERVICOS DIGITAIS LTDA")
fmt.Printf("FindCnpjByContains result: %d\n", cnpj)
}
from ctypes import Structure, c_char_p, c_longlong, cdll
class GoString(Structure):
_fields_ = [("p", c_char_p), ("n", c_longlong)]
gofindcnpj = cdll.LoadLibrary("./gofindcnpj.so")
gofindcnpj.FindCnpjByContains.argtypes = [GoString, GoString]
gofindcnpj.FindCnpjByContains.restype = c_longlong
gofindcnpj.FindCnpjByRegex.argtypes = [GoString, GoString]
gofindcnpj.FindCnpjByRegex.restype = c_longlong
if __name__ == '__main__':
with open('./data/MINIMAL', 'r', encoding='iso8859') as fp:
content = fp.read()
company_name = 'OLIST SERVICOS DIGITAIS LTDA'
new_content = bytes(content, 'utf-8')
new_company_name = bytes(company_name, 'utf-8')
cnpj = gofindcnpj.FindCnpjByContains(
GoString(new_content, len(new_content)),
GoString(new_company_name, len(new_company_name)),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment