Skip to content

Instantly share code, notes, and snippets.

@raccy
Last active August 13, 2018 06:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raccy/3ccc6f1a2ca7ca60ef7c85e17875ecbd to your computer and use it in GitHub Desktop.
Save raccy/3ccc6f1a2ca7ca60ef7c85e17875ecbd to your computer and use it in GitHub Desktop.
ビット列検索
data.txt
result.txt
result_*.txt
pf_*.txt
*.o
# ビット列検索
テスト用データは "make_data.rb" をRubyで実行すれば作れます。"bitsearch.rb" はRubyを用いた正規表現での実装です。
AVX2対応のCPUを使用してください。128bitなSSE版はそのうち。AVX-512は環境を持っていないので保留。
WSLなUbuntuのGCCでのみ動作確認しています。Mingw-w64では`aligned_alloc`が無いっぽい。VC++でするには色々書き換えが必要かも。
使い方
```
$ ruby make_data.rb > data.txt
$ make
$ ./test.sh
```
手元ではAVX2版が最速でしたが、long longとさほど変わりませんでした。AVX2でシフトした配列を作る処理をAVX2のシフト関数を用いれば、もう少し速くなるかも知れません。
/**
* ビット列検索プログラム main部分
*/
// common
#include "bitsearch.h"
#include <stdio.h>
static int search(const unsigned char *target_bits, int target_size,
const unsigned char *search_bits, int search_size,
const unsigned char *search_mask);
int main(void)
{
int result = bitsearch_inout(stdin, stdout, search);
if (result < 0) {
fprintf(stderr, "error occured: %d\n", -result);
return -result;
}
fprintf(stderr, "success match: %d\n", result);
return 0;
}
/**
* ビット列検索プログラム あらかじめ用意 本体部分
* bitint_tをtypedefしておくこと
*/
#ifndef BITINT_DEFINED
#define BITINT_DEFINED
typedef unsigned int bitint_t;
#endif
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#define BITINT_BIT ((int)(CHAR_BIT * sizeof(bitint_t)))
static int search_bitint(const bitint_t *target_bits, int target_size,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask);
static bool check_bitint(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask);
static bitint_t *create_shift(const bitint_t *bits, int size);
static int search(const unsigned char *target_bits, int target_size,
const unsigned char *search_bits, int search_size,
const unsigned char *search_mask)
{
return search_bitint((const bitint_t *)target_bits, target_size,
(const bitint_t *)search_bits, search_size,
(const bitint_t *)search_mask);
}
static int search_bitint(const bitint_t *target_bits, int target_size,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask)
{
const bitint_t *bits_list[BITINT_BIT][2];
bits_list[0][0] = search_bits;
bits_list[0][1] = search_mask;
for (int i = 1; i < BITINT_BIT; i++) {
bits_list[i][0] =
create_shift(bits_list[i - 1][0], search_size + i - 1);
bits_list[i][1] =
create_shift(bits_list[i - 1][1], search_size + i - 1);
if (bits_list[i][0] == NULL || bits_list[i][1] == NULL)
return -2;
}
int result = -1;
for (int i = 0; i < target_size - search_size + 1; i++) {
int offset = i % BITINT_BIT;
if (check_bitint(target_bits + i / BITINT_BIT,
bits_list[offset][0], search_size + offset,
bits_list[offset][1])) {
result = i;
break;
}
}
for (int i = 1; i < BITINT_BIT; i++) {
free((void *)bits_list[i][0]);
free((void *)bits_list[i][1]);
}
return result;
}
static bool check_bitint(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask)
{
for (int i = 0; i < search_size / BITINT_BIT; i++) {
if ((*target_bits++ ^ *search_bits++) & *search_mask++) {
return false;
}
}
if (search_size % BITINT_BIT) {
if ((*target_bits ^ *search_bits) & *search_mask) {
return false;
}
}
return true;
}
static bitint_t *create_shift(const bitint_t *bits, int size)
{
bitint_t *new_bits =
(bitint_t *)calloc((size + 1) / BITINT_BIT + 1, sizeof(bitint_t));
if (new_bits == NULL)
return NULL;
bitint_t *dist_bits = new_bits;
*dist_bits++ |= (*bits << 1);
for (int i = 1; i < (size + 1) / BITINT_BIT; i++) {
*dist_bits |= *bits++ >> (BITINT_BIT - 1);
*dist_bits++ |= *bits << 1;
}
int remain = (size + 1) % BITINT_BIT;
if (remain) {
if (remain <= 1) {
*dist_bits |= *bits >> (BITINT_BIT - 1);
} else {
*dist_bits |= *bits++ >> (BITINT_BIT - 1);
*dist_bits |= *bits << 1;
}
}
return new_bits;
}
/**
* ビット列検索プログラム シフトしていくだけ 本体部分
* bitint_tをtypedefしておくこと
*/
#ifndef BITINT_DEFINED
#define BITINT_DEFINED
typedef unsigned int bitint_t;
#endif
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#define BITINT_BIT ((int)(CHAR_BIT * sizeof(bitint_t)))
static int search_bitint(const bitint_t *target_bits, int target_size,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask);
static bool check_bitint(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask);
static bool check_bitint_offset(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask, int offset);
static int search(const unsigned char *target_bits, int target_size,
const unsigned char *search_bits, int search_size,
const unsigned char *search_mask)
{
return search_bitint((const bitint_t *)target_bits, target_size,
(const bitint_t *)search_bits, search_size,
(const bitint_t *)search_mask);
}
static int search_bitint(const bitint_t *target_bits, int target_size,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask)
{
for (int i = 0; i < target_size - search_size + 1; i++) {
if (check_bitint_offset(target_bits + i / BITINT_BIT,
search_bits, search_size, search_mask,
i % BITINT_BIT)) {
return i;
}
}
return -1;
}
static bool check_bitint(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask)
{
for (int i = 0; i < search_size / BITINT_BIT; i++) {
if ((*target_bits++ ^ *search_bits++) & *search_mask++) {
return false;
}
}
if (search_size % BITINT_BIT) {
if ((*target_bits ^ *search_bits) & *search_mask) {
return false;
}
}
return true;
}
static bool check_bitint_offset(const bitint_t *target_bits,
const bitint_t *search_bits, int search_size,
const bitint_t *search_mask, int offset)
{
if (offset == 0)
return check_bitint(target_bits, search_bits, search_size,
search_mask);
if ((*target_bits++ ^ (*search_bits << offset)) &
(bitint_t)(*search_mask << offset)) {
return false;
}
for (int i = 1; i < (search_size + offset) / BITINT_BIT; i++) {
if ((*target_bits ^ (*search_bits++ >> (BITINT_BIT - offset))) &
(*search_mask++ >> (BITINT_BIT - offset))) {
return false;
}
if ((*target_bits++ ^ (*search_bits << offset)) &
(bitint_t)(*search_mask << offset)) {
return false;
}
}
int remain = (search_size + offset) % BITINT_BIT;
if (remain) {
if (remain <= offset) {
if ((*target_bits ^
(*search_bits >> (BITINT_BIT - offset))) &
(*search_mask >> (BITINT_BIT - offset))) {
return false;
}
} else {
if ((*target_bits ^
(*search_bits++ >> (BITINT_BIT - offset))) &
(*search_mask++ >> (BITINT_BIT - offset))) {
return false;
}
if ((*target_bits ^ (*search_bits << offset)) &
(bitint_t)(*search_mask << offset)) {
return false;
}
}
}
return true;
}
/**
* ビット列検索プログラム メイン処理用
* リトルエンディアンとみなし、下位バイトからの配列とする。
*/
#include "bitsearch.h"
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static inline int skip_ws(FILE *file)
{
while (true) {
int c = getc(file);
switch (c) {
case EOF:
return EOF;
case '\r':
case '\n':
case '\t':
case '\v':
case ' ':
break;
default:
ungetc(c, stdin);
return c;
}
}
return -1;
}
int bitsearch_inout(FILE *in, FILE *out, bitsearch_func func)
{
int target_size;
if (fscanf(in, "%d", &target_size) != 1)
exit(1);
skip_ws(stdin);
unsigned char *target_bits = (unsigned char *)aligned_alloc(
BITSEARCH_ALIGN,
(target_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) * BITSEARCH_ALIGN);
if (target_bits == NULL)
exit(1);
memset((void *)target_bits, 0,
(target_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
for (int i = 0; i < target_size; i++) {
int c = getc(in);
switch (c) {
case '0':
break;
case '1':
target_bits[i / CHAR_BIT] |= 1 << (i % CHAR_BIT);
break;
default:
exit(1);
}
}
skip_ws(stdin);
// print_bits(target_bits, target_size);
int word_num;
if (fscanf(in, "%d", &word_num) != 1)
exit(1);
int match_count = 0;
for (int i = 0; i < word_num; i++) {
int search_size;
if (fscanf(in, "%d", &search_size) != 1)
exit(1);
skip_ws(stdin);
unsigned char *search_bits = (unsigned char *)aligned_alloc(
BITSEARCH_ALIGN,
(search_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
if (search_bits == NULL)
exit(1);
memset((void *)search_bits, 0,
(search_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
unsigned char *search_mask = (unsigned char *)aligned_alloc(
BITSEARCH_ALIGN,
(search_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
if (search_mask == NULL)
exit(1);
memset((void *)search_mask, 0,
(search_size / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
for (int i = 0; i < search_size; i++) {
int c = getc(in);
switch (c) {
case '0':
search_mask[i / CHAR_BIT] |= 1
<< (i % CHAR_BIT);
break;
case '1':
search_bits[i / CHAR_BIT] |= 1
<< (i % CHAR_BIT);
search_mask[i / CHAR_BIT] |= 1
<< (i % CHAR_BIT);
break;
case 'x':
break;
default:
exit(1);
}
}
skip_ws(stdin);
// print_bits(search_bits, search_size);
int result = func(target_bits, target_size, search_bits,
search_size, search_mask);
fprintf(out, "%d\n", result);
if (result >= 0)
match_count++;
free(search_bits);
}
free(target_bits);
return match_count;
}
int fprint_bit(FILE *file, unsigned char bit, int size)
{
for (int i = 0; i < size; i++) {
putc(bit & 0x1 ? '1' : '0', file);
bit >>= 1;
}
return size;
}
int fprint_bits(FILE *file, const unsigned char *bits, int size)
{
int count = 0;
for (int i = 0; i < size / CHAR_BIT; i++) {
count += fprint_bit(file, *bits++, CHAR_BIT);
}
if (size % CHAR_BIT) {
count += fprint_bit(file, *bits, size % CHAR_BIT);
}
fprintf(file, "\n");
count++;
return count;
}
#ifndef BITSEARCH_H
#define BITSEARCH_H
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#define BITSEARCH_ALIGN 32 // 256bit
typedef int (*bitsearch_func)(const unsigned char *, int, const unsigned char *,
int, const unsigned char *);
int bitsearch_inout(FILE *in, FILE *out, bitsearch_func func);
int fprint_bit(FILE *, const unsigned char bit, int size);
int fprint_bits(FILE *, const unsigned char *bits, int size);
#endif
#!/bin/env ruby
# frozen_string_literal: true
gets
target = gets.chomp
word_num = gets.chomp.to_i
word_num.times do
_, keyword = gets.split
re = Regexp.compile(keyword.tr('x', '.'))
result = target.index(re)
if result
puts result
else
puts(-1)
end
end
/**
* ビット列検索プログラム AVX2 256bit
*/
#include "_imple_bitsearch_main.h"
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <x86intrin.h>
#define BITINT_BIT ((int)(CHAR_BIT * sizeof(__m256i)))
static int search_bitint(const __m256i *target_bits, int target_size,
const __m256i *search_bits, int search_size,
const __m256i *search_mask);
static bool check_bitint(const __m256i *target_bits, const __m256i *search_bits,
int search_size, const __m256i *search_mask);
static uint64_t *create_shift(const uint64_t *bits, int size);
static int search(const unsigned char *target_bits, int target_size,
const unsigned char *search_bits, int search_size,
const unsigned char *search_mask)
{
return search_bitint((const __m256i *)target_bits, target_size,
(const __m256i *)search_bits, search_size,
(const __m256i *)search_mask);
}
static int search_bitint(const __m256i *target_bits, int target_size,
const __m256i *search_bits, int search_size,
const __m256i *search_mask)
{
const __m256i *bits_list[BITINT_BIT][2];
bits_list[0][0] = search_bits;
bits_list[0][1] = search_mask;
for (int i = 1; i < BITINT_BIT; i++) {
bits_list[i][0] = (__m256i *)create_shift(
(const uint64_t *)bits_list[i - 1][0], search_size + i - 1);
bits_list[i][1] = (__m256i *)create_shift(
(const uint64_t *)bits_list[i - 1][1], search_size + i - 1);
if (bits_list[i][0] == NULL || bits_list[i][1] == NULL)
return -2;
}
int result = -1;
for (int i = 0; i < target_size - search_size + 1; i++) {
int offset = i % BITINT_BIT;
if (check_bitint(target_bits + i / BITINT_BIT,
bits_list[offset][0], search_size + offset,
bits_list[offset][1])) {
result = i;
break;
}
}
for (int i = 1; i < BITINT_BIT; i++) {
free((void *)bits_list[i][0]);
free((void *)bits_list[i][1]);
}
return result;
}
static bool check_bitint(const __m256i *target_bits, const __m256i *search_bits,
int search_size, const __m256i *search_mask)
{
for (int i = 0; i < search_size / BITINT_BIT; i++) {
if (!_mm256_testz_si256(
_mm256_xor_si256(*target_bits++, *search_bits++),
*search_mask++)) {
return false;
}
}
if (search_size % BITINT_BIT) {
if (!_mm256_testz_si256(
_mm256_xor_si256(*target_bits, *search_bits),
*search_mask)) {
return false;
}
}
return true;
}
static uint64_t *create_shift(const uint64_t *bits, int size)
{
uint64_t *new_bits = (uint64_t *)aligned_alloc(
BITSEARCH_ALIGN,
((size + 1) / (BITSEARCH_ALIGN * CHAR_BIT) + 1) * BITSEARCH_ALIGN);
if (new_bits == NULL)
exit(1);
memset((void *)new_bits, 0,
((size + 1) / (BITSEARCH_ALIGN * CHAR_BIT) + 1) *
BITSEARCH_ALIGN);
uint64_t *dist_bits = new_bits;
*dist_bits++ |= (*bits << 1);
for (int i = 1; i < (size + 1) / 64; i++) {
*dist_bits |= *bits++ >> (64 - 1);
*dist_bits++ |= *bits << 1;
}
int remain = (size + 1) % 64;
if (remain) {
if (remain <= 1) {
*dist_bits |= *bits >> (64 - 1);
} else {
*dist_bits |= *bits++ >> (64 - 1);
*dist_bits |= *bits << 1;
}
}
return new_bits;
}
/**
* ビット列検索プログラム char
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned char bitint_t;
#include "_imple_bitsearch_prep.h"
/**
* ビット列検索プログラム int
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned int bitint_t;
#include "_imple_bitsearch_prep.h"
/**
* ビット列検索プログラム long long
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned long long bitint_t;
#include "_imple_bitsearch_prep.h"
/**
* ビット列検索プログラム char
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned char bitint_t;
#include "_imple_bitsearch_shift.h"
/**
* ビット列検索プログラム int
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned int bitint_t;
#include "_imple_bitsearch_shift.h"
/**
* ビット列検索プログラム long long
*/
#include "_imple_bitsearch_main.h"
// 符号無しであることが必須
#define BITINT_DEFINED
typedef unsigned long long bitint_t;
#include "_imple_bitsearch_shift.h"
#!/bin/env ruby
# frozen_string_literal: true
word_num = 20
max_word_size = 2000
target_size = 100_000_000
wildcard_rate = 0.25
puts target_size
target_size.times { print (rand(100) / 99).to_s }
puts
puts word_num
word_num.times do
size = rand(1..max_word_size)
print size
print ' '
size.times.map do |i|
if i != 0 && i != size - 1 && rand < wildcard_rate
print 'x'
else
print (rand(100) / 99).to_s
end
end
puts
end
CC = gcc
CFLAGS = -Wall -Wextra -O2 -march=native -std=c11 -pedantic
TARGETS = bitsearch_shift_char bitsearch_shift_int bitsearch_shift_ll \
bitsearch_prep_char bitsearch_prep_int bitsearch_prep_ll \
bitsearch_prep_avx2
.PHONY: all
all: $(TARGETS)
.PHONY: clean
clean:
rm $(TARGETS) *.o pf_*.txt result_*.txt
SUFFIXES: .o .c
.c.o:
$(CC) $(CFLAGS) -c $<
bitsearch_shift_char: bitsearch_shift_char.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_shift_char $^
bitsearch_shift_int: bitsearch_shift_int.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_shift_int $^
bitsearch_shift_ll: bitsearch_shift_ll.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_shift_ll $^
bitsearch_prep_char: bitsearch_prep_char.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_prep_char $^
bitsearch_prep_int: bitsearch_prep_int.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_prep_int $^
bitsearch_prep_ll: bitsearch_prep_ll.o bitsearch.o
$(CC) $(CFLAGS) -o bitsearch_prep_ll $^
bitsearch_prep_avx2: bitsearch_prep_avx2.o bitsearch.o
$(CC) $(CFLAGS) -mavx2 -o bitsearch_prep_avx2 $^
#!/bin/bash
/usr/bin/time -v ./bitsearch_shift_char < data.txt > result_shift_char.txt 2> pf_shift_char.txt
/usr/bin/time -v ./bitsearch_shift_int < data.txt > result_shift_int.txt 2> pf_shift_int.txt
/usr/bin/time -v ./bitsearch_shift_ll < data.txt > result_shift_ll.txt 2> pf_shift_ll.txt
/usr/bin/time -v ./bitsearch_prep_char < data.txt > result_prep_char.txt 2> pf_prep_char.txt
/usr/bin/time -v ./bitsearch_prep_int < data.txt > result_prep_int.txt 2> pf_prep_int.txt
/usr/bin/time -v ./bitsearch_prep_ll < data.txt > result_prep_ll.txt 2> pf_prep_ll.txt
/usr/bin/time -v ./bitsearch_prep_avx2 < data.txt > result_prep_avx2.txt 2> pf_prep_avx2.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment