Blog 2020/5/28
<- previous | index | next ->
After writing a trivial sudoku solver in C yesterday, today I'll use it to benchmark the compilation speed of TCC vs GCC across a variety of processor speeds.
model name : Geode(TM) Integrated Processor by National Semi | |
tcc version 0.9.27 (prerelease) (i386 Linux) | |
gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.11user 0.03system 0:00.15elapsed 95%CPU (0avgtext+0avgdata 2340maxresident)k | |
0inputs+16outputs (0major+236minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
1.78user 0.27system 0:02.09elapsed 98%CPU (0avgtext+0avgdata 18944maxresident)k | |
0inputs+96outputs (0major+3319minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
3.40user 0.32system 0:03.75elapsed 99%CPU (0avgtext+0avgdata 20044maxresident)k | |
0inputs+96outputs (0major+3550minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
6.07user 0.33system 0:06.44elapsed 99%CPU (0avgtext+0avgdata 21920maxresident)k | |
0inputs+104outputs (0major+3804minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
4.18user 0.27system 0:04.54elapsed 97%CPU (0avgtext+0avgdata 21176maxresident)k | |
0inputs+88outputs (0major+3582minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
23.83user 0.40system 0:24.31elapsed 99%CPU (0avgtext+0avgdata 28560maxresident)k | |
0inputs+200outputs (0major+5593minor)pagefaults 0swaps |
model name : Intel(R) Core(TM) i3-2350M CPU @ 2.30GHz | |
tcc version 0.9.27 (x86_64 Linux) | |
gcc (Debian 8.3.0-6) 8.3.0 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.01user 0.00system 0:00.01elapsed 90%CPU (0avgtext+0avgdata 2836maxresident)k | |
0inputs+24outputs (0major+313minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
0.06user 0.02system 0:00.09elapsed 100%CPU (0avgtext+0avgdata 21464maxresident)k | |
0inputs+96outputs (0major+4847minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
0.11user 0.02system 0:00.13elapsed 99%CPU (0avgtext+0avgdata 24224maxresident)k | |
0inputs+88outputs (0major+5086minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
0.19user 0.02system 0:00.21elapsed 100%CPU (0avgtext+0avgdata 26164maxresident)k | |
0inputs+88outputs (0major+5407minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
0.14user 0.02system 0:00.17elapsed 99%CPU (0avgtext+0avgdata 25632maxresident)k | |
0inputs+80outputs (0major+5163minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
0.51user 0.04system 0:00.56elapsed 99%CPU (0avgtext+0avgdata 39968maxresident)k | |
0inputs+240outputs (0major+8583minor)pagefaults 0swaps |
a.out: sudoku.c | |
gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out | |
ccbench: | |
@cat /proc/cpuinfo | grep 'model name' | head -n1 | |
@tcc -v || true | |
@gcc --version | head -n1 | |
@echo | |
@tcc sudoku.c | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
@echo | |
@gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
@echo | |
@gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
@echo | |
@gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
@echo | |
@gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
@echo | |
@gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
clean: | |
rm -f a.out a.out.* | |
.PHONY: ccbench clean |
model name : Intel(R) Atom(TM) CPU N270 @ 1.60GHz | |
tcc version 0.9.27 (i386 Linux) | |
gcc (Debian 8.3.0-6) 8.3.0 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.01user 0.01system 0:00.02elapsed 91%CPU (0avgtext+0avgdata 2592maxresident)k | |
0inputs+16outputs (0major+257minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
0.22user 0.08system 0:00.31elapsed 99%CPU (0avgtext+0avgdata 18864maxresident)k | |
0inputs+104outputs (0major+3475minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
0.45user 0.06system 0:00.52elapsed 99%CPU (0avgtext+0avgdata 21172maxresident)k | |
0inputs+104outputs (0major+3698minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
0.81user 0.06system 0:00.88elapsed 99%CPU (0avgtext+0avgdata 23384maxresident)k | |
0inputs+112outputs (0major+3975minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
0.57user 0.05system 0:00.64elapsed 99%CPU (0avgtext+0avgdata 22876maxresident)k | |
0inputs+96outputs (0major+3766minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
2.22user 0.07system 0:02.30elapsed 99%CPU (0avgtext+0avgdata 29940maxresident)k | |
0inputs+192outputs (0major+5540minor)pagefaults 0swaps |
model name : Geode(TM) Integrated Processor by AMD PCS | |
tcc version 0.9.27 (prerelease) (i386 Linux) | |
gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.06user 0.00system 0:00.06elapsed 89%CPU (0avgtext+0avgdata 1388maxresident)k | |
0inputs+0outputs (0major+403minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
0.92user 0.07system 0:01.02elapsed 97%CPU (0avgtext+0avgdata 10932maxresident)k | |
0inputs+0outputs (0major+6296minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
1.70user 0.06system 0:01.82elapsed 96%CPU (0avgtext+0avgdata 13036maxresident)k | |
0inputs+0outputs (0major+6886minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
2.95user 0.04system 0:03.06elapsed 97%CPU (0avgtext+0avgdata 14768maxresident)k | |
0inputs+0outputs (0major+7333minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
2.00user 0.12system 0:02.17elapsed 97%CPU (0avgtext+0avgdata 14040maxresident)k | |
0inputs+0outputs (0major+7104minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
11.03user 0.14system 0:11.23elapsed 99%CPU (0avgtext+0avgdata 21780maxresident)k | |
0inputs+0outputs (0major+9177minor)pagefaults 0swaps |
model name : Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz | |
tcc version 0.9.27 (x86_64 Linux) | |
gcc (Debian 8.3.0-6) 8.3.0 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.00user 0.00system 0:00.00elapsed 100%CPU (0avgtext+0avgdata 2604maxresident)k | |
0inputs+24outputs (0major+308minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
0.03user 0.00system 0:00.04elapsed 100%CPU (0avgtext+0avgdata 21408maxresident)k | |
0inputs+96outputs (0major+4828minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
0.06user 0.00system 0:00.06elapsed 98%CPU (0avgtext+0avgdata 24068maxresident)k | |
0inputs+88outputs (0major+5078minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
0.09user 0.00system 0:00.10elapsed 100%CPU (0avgtext+0avgdata 26196maxresident)k | |
0inputs+88outputs (0major+5410minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
0.07user 0.00system 0:00.08elapsed 98%CPU (0avgtext+0avgdata 25612maxresident)k | |
0inputs+80outputs (0major+5158minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
0.25user 0.02system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 39944maxresident)k | |
0inputs+240outputs (0major+8574minor)pagefaults 0swaps |
model name : Intel(R) Core(TM)2 CPU 6300 @ 1.86GHz | |
tcc version 0.9.27 (x86_64 Linux) | |
gcc (Debian 8.3.0-6) 8.3.0 | |
time tcc sudoku.c 2>&1 && mv a.out a.out.tcc | |
0.00user 0.00system 0:00.00elapsed 88%CPU (0avgtext+0avgdata 2808maxresident)k | |
0inputs+24outputs (0major+314minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O0 sudoku.c -o a.out.gccO0 2>&1 | |
0.08user 0.02system 0:00.11elapsed 99%CPU (0avgtext+0avgdata 21568maxresident)k | |
0inputs+96outputs (0major+4851minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O1 sudoku.c -o a.out.gccO1 2>&1 | |
0.13user 0.04system 0:00.18elapsed 99%CPU (0avgtext+0avgdata 24296maxresident)k | |
0inputs+88outputs (0major+5112minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O2 sudoku.c -o a.out.gccO2 2>&1 | |
0.25user 0.02system 0:00.28elapsed 99%CPU (0avgtext+0avgdata 26128maxresident)k | |
0inputs+88outputs (0major+5430minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -Os sudoku.c -o a.out.gccOs 2>&1 | |
0.19user 0.01system 0:00.21elapsed 100%CPU (0avgtext+0avgdata 25636maxresident)k | |
0inputs+80outputs (0major+5150minor)pagefaults 0swaps | |
time gcc -std=c89 -Wall -Werror -O3 sudoku.c -o a.out.gccO3 2>&1 | |
0.75user 0.05system 0:00.81elapsed 99%CPU (0avgtext+0avgdata 40160maxresident)k | |
0inputs+240outputs (0major+8605minor)pagefaults 0swaps |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <stdbool.h> | |
/* A sudoku solver. */ | |
/* | |
Copyright (c) 2020 Jason Pepas | |
Released under the terms of the MIT license. | |
See https://opensource.org/licenses/MIT | |
*/ | |
/* | |
Board file format: plain text, 9 lines of 9 numbers (zero for empty box). | |
Example: | |
530070000 | |
600195000 | |
098000060 | |
800060003 | |
400803001 | |
700020006 | |
060000280 | |
000419005 | |
000080079 | |
*/ | |
/* | |
Data structures: | |
- A 'box' is a set of the remaining possible solutions for a spot in the grid. | |
This set is represented as a uint16_t bitvector, with the sudoke numbers 1-9 | |
represented as the first 9 bits in the bitvector. | |
- A solved box has only one bit set. | |
- A 'board' is a 2-dimensional array (9x9) of boxes (bitvectors). | |
- A solved board is a 9x9 grid of bitvectors which each have only one item. | |
*/ | |
/* | |
The general approach is to iterate over the board and use logic to reduce | |
the number of possible solutions in each box until the board is solved. | |
Algorithm: | |
- For each row, build a set of the solved numbers in that row, then | |
remove those solutions from the remaining possibilities in each of the | |
unsolved boxes in that row. | |
- Do the same for each column. | |
- Do the same for each 3x3 grid. | |
- Repeat the above process until either the board is solved, or until | |
deadlock is detected (i.e. the board state remains unchanged after an | |
iteration). | |
*/ | |
/* the board. */ | |
uint16_t g_board[9][9]; | |
/* set all 9 bits in this box. */ | |
void board_set_all(int row, int col) { | |
uint16_t all_nums = /* 0b0000000111111111 */ 0x1FF; | |
g_board[row][col] = all_nums; | |
} | |
/* convert a number to a bitvector. */ | |
uint16_t num_to_bitvec(int num) { | |
return 1 << (num-1); | |
} | |
/* add num to the box's bitvector. */ | |
void board_set_num(int row, int col, int num) { | |
g_board[row][col] |= num_to_bitvec(num); | |
} | |
/* count the number of bits which are set in the bitvector. */ | |
int count_set_bits(uint16_t box) { | |
int set_bits_count = 0; | |
int i = 0; | |
while (i < 16) { | |
if (box & /* 0b1 */ 0x1) { | |
set_bits_count++; | |
} | |
box = (box >> 1); | |
i++; | |
continue; | |
} | |
return set_bits_count; | |
} | |
/* is the box solved? */ | |
bool box_is_solved(int row, int col) { | |
uint16_t box = g_board[row][col]; | |
int bitcount = count_set_bits(box); | |
return bitcount == 1; | |
} | |
/* is the board solved? */ | |
bool board_is_solved() { | |
int row = 0; | |
int col = 0; | |
while (row < 9) { | |
while (col < 9) { | |
if (!box_is_solved(row, col)) { | |
return false; | |
} | |
col++; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
return true; | |
} | |
/* return the character representation of the box. */ | |
char board_get_ch(int row, int col) { | |
if (!box_is_solved(row, col)) { | |
return '0'; | |
} | |
uint16_t box = g_board[row][col]; | |
uint8_t num = 1; | |
while (num < 10) { | |
if (box & /* 0b1 */ 0x1) { | |
return (char)(num + '0'); | |
} | |
box = (box >> 1); | |
num++; | |
continue; | |
} | |
assert(false); | |
exit(2); | |
} | |
/* print the board state. prints '0' for unsolved boxes. */ | |
void print_board() { | |
int row = 0; | |
while (row < 9) { | |
int col = 0; | |
while (col < 9) { | |
char ch = board_get_ch(row, col); | |
putchar(ch); | |
col++; | |
continue; | |
} | |
putchar('\n'); | |
row++; | |
continue; | |
} | |
} | |
/* return the total count of all remaining possibliities in every box. | |
a solved board has a possibilities count of 81. */ | |
int possibilities_count() { | |
int pcount = 0; | |
int row = 0; | |
while (row < 9) { | |
int col = 0; | |
while (col < 9) { | |
uint16_t box = g_board[row][col]; | |
int bitcount = count_set_bits(box); | |
pcount += bitcount; | |
col++; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
return pcount; | |
} | |
/* read the board in from a file. */ | |
void load_board(char* fname) { | |
FILE* f = fopen(fname, "r"); | |
assert(f != NULL); | |
const int bufsize = 110; /* 9+2 per line * 9 lines + 1 null */ | |
char buf[bufsize]; | |
size_t count = fread(buf, 1, bufsize, f); | |
assert(count >= 90); | |
fclose(f); | |
int i = 0; | |
int row = 0; | |
while (row < 9) { | |
int col = 0; | |
while (col < 9) { | |
assert (i < bufsize); | |
char ch = buf[i]; | |
if (ch == '0') { | |
board_set_all(row, col); | |
} else { | |
int num = ch - '0'; | |
board_set_num(row, col, num); | |
} | |
i++; | |
col++; | |
continue; | |
} | |
/* skip over the newline. */ | |
char ch = buf[i]; | |
while (ch == '\n' || ch == '\r') { | |
i++; | |
ch = buf[i]; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
} | |
/* return the combined bitmask of all solved boxes in the row. */ | |
uint16_t get_row_solved_bitmask(int row) { | |
uint16_t bitmask = 0; | |
int col = 0; | |
while (col < 9) { | |
if (box_is_solved(row, col)) { | |
bitmask |= g_board[row][col]; | |
} | |
col++; | |
continue; | |
} | |
return bitmask; | |
} | |
/* iterate over the rows, reducing possibilities. */ | |
void iterate_rows() { | |
int row = 0; | |
while (row < 9) { | |
uint16_t solved = get_row_solved_bitmask(row); | |
int col = 0; | |
while (col < 9) { | |
if (!box_is_solved(row, col)) { | |
/* subtract the solved bits from this box. */ | |
uint16_t box = g_board[row][col]; | |
box &= ~(solved); | |
g_board[row][col] = box; | |
} | |
col++; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
} | |
/* return the combined bitmask of all solved boxes in the column. */ | |
uint16_t get_col_solved_bitmask(int col) { | |
uint16_t bitmask = 0; | |
int row = 0; | |
while (row < 9) { | |
if (box_is_solved(row, col)) { | |
bitmask |= g_board[row][col]; | |
} | |
row++; | |
continue; | |
} | |
return bitmask; | |
} | |
/* iterate over the columns, reducing possibilities. */ | |
void iterate_columns() { | |
int col = 0; | |
while (col < 9) { | |
uint16_t solved = get_col_solved_bitmask(col); | |
int row = 0; | |
while (row < 9) { | |
if (!box_is_solved(row, col)) { | |
/* subtract the solved bits from this box. */ | |
uint16_t box = g_board[row][col]; | |
box &= ~(solved); | |
g_board[row][col] = box; | |
} | |
row++; | |
continue; | |
} | |
col++; | |
continue; | |
} | |
} | |
/* return the combined bitmask of all solved boxes in the grid. */ | |
uint16_t get_grid_solved_bitmask(int grid) { | |
uint16_t bitmask = 0; | |
int first_row = (grid / 3) * 3; | |
int first_col = (grid % 3) * 3; | |
int row = first_row; | |
while (row < (first_row + 3)) { | |
int col = first_col; | |
while (col < (first_col + 3)) { | |
if (box_is_solved(row, col)) { | |
bitmask |= g_board[row][col]; | |
} | |
col++; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
return bitmask; | |
} | |
/* iterate over the grids, reducing possibilities. */ | |
void iterate_grids() { | |
int grid = 0; | |
while (grid < 9) { | |
uint16_t solved = get_grid_solved_bitmask(grid); | |
int first_row = (grid / 3) * 3; | |
int first_col = (grid % 3) * 3; | |
int row = first_row; | |
while (row < (first_row + 3)) { | |
int col = first_col; | |
while (col < (first_col + 3)) { | |
if (!box_is_solved(row, col)) { | |
/* subtract the solved bits from this box. */ | |
uint16_t box = g_board[row][col]; | |
box &= ~(solved); | |
g_board[row][col] = box; | |
} | |
col++; | |
continue; | |
} | |
row++; | |
continue; | |
} | |
grid++; | |
continue; | |
} | |
} | |
/* iterate over the board, reducing possibilities. */ | |
void iterate_board() { | |
iterate_rows(); | |
iterate_columns(); | |
iterate_grids(); | |
} | |
int main(int argc, char* argv[]) { | |
load_board(argv[1]); | |
int before_pcount = possibilities_count(); | |
while (!board_is_solved()) { | |
iterate_board(); | |
int after_pcount = possibilities_count(); | |
if (after_pcount == before_pcount) { | |
fprintf(stderr, "Error: don't know how to solve this board.\n"); | |
exit(3); | |
} else { | |
before_pcount = after_pcount; | |
continue; | |
} | |
} | |
print_board(); | |
return EXIT_SUCCESS; | |
} |