Skip to content

Instantly share code, notes, and snippets.

@syusui-s
Last active May 13, 2023 09:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syusui-s/3fd2352e1773ced36c0882194d6509fd to your computer and use it in GitHub Desktop.
Save syusui-s/3fd2352e1773ced36c0882194d6509fd to your computer and use it in GitHub Desktop.
node_modules/
package-lock.json
import XXH from 'xxhashjs';
const seeds = new Array(1).fill(0).map(() => Math.floor(Math.random() * 1.0e8));
const bloomCount = (n: number, data: string[]): bigint[] => {
if (n < data.length) throw new Error();
const bitsets = new Array(seeds.length).fill(0n);
// console.log(data.length)
for (let bitsetIndex = 0; bitsetIndex < bitsets.length; bitsetIndex++) {
for (let dataIndex = 0; dataIndex < data.length; dataIndex++) {
const bitset = bitsets[bitsetIndex];
const xxhash = XXH.h32(data[dataIndex], seeds[0]);
const rem = BigInt(xxhash.toNumber() % n);
let j = rem;
// 既にビットセットされていれば次の空きビットを探す
while ((bitset & (1n << j)) !== 0n) {
j = (j + 1n) % BigInt(n);
// 一周したら終了する
if (j === rem) break;
// console.log(rem, j)
}
// console.log(dataIndex, rem, j, data[dataIndex])
bitsets[bitsetIndex] = bitset | 1n << j;
}
}
return bitsets;
};
const mergeBloomCount = (bitsetsArray: bigint[][]) => {
if (!bitsetsArray.every((bitsets) => bitsetsArray[0].length === bitsets.length)) throw new Error();
const result = new Array(bitsetsArray[0].length).fill(0n);
for (let i = 0; i < bitsetsArray.length; i++) {
for (let j = 0; j < result.length; j++) {
result[j] |= bitsetsArray[i][j];
}
}
return result;
};
const table: number[] = [];
for (let i = 0; i <= 0xff; i += 1) {
let count = 0;
for (let j = i; j > 0; j = j >> 1) {
if ((j & 1) == 1) count += 1;
}
table[i] = count;
}
const countBloomCount = (bitsets: bigint[]) => {
const counts = bitsets.map((bitset) => {
let count = 0;
for (let i = bitset; i > 0n; i = i >> 8n) {
count += table[Number(i & 0xffn)];
}
return count;
})
return Math.max(...counts);
};
const split = <T>(n: number, data: T[]) => {
const result = [];
let unit = Math.floor(data.length / n);
for (let i = 0; i < data.length; i += unit) {
result.push(data.slice(i, i + unit));
}
return result;
};
const n = 1000;
const len = 100;
const counts = [];
for (let i = 0; i < 1000; i++) {
const data =
new Array(len)
.fill(null)
.map(() => Math.floor(Math.random() * 1.0e16).toString());
const result = bloomCount(n, data);
const splitted = split(6, data);
const results = splitted.map((d) => {
return bloomCount(n, d);
});
const merged = mergeBloomCount(results);
// console.time('countBloomCount');
const count = countBloomCount(merged);
// console.timeEnd('countBloomCount');
counts.push(count);
}
const stat = (counts: number[]) => {
let min = Infinity;
let max = 0;
let sum = 0;
counts.forEach((e) => {
min = Math.min(min, e);
max = Math.max(max, e);
sum += e;
});
const avg = sum / counts.length;
return { min, max, sum, avg };
}
console.log(`len = ${len}, n = ${n}`);
console.log(stat(counts));
console.log(counts.map((e) => e));
// カッコウハッシングみたいな感じにして
// 何回か問い合わせていくと精度があがるみたいな仕組みは作れそう?
// ただ、サーバは相応に負荷を受けることになる
{
"name": "bloomcount",
"version": "1.0.0",
"description": "",
"main": "bloomcount.js",
"scripts": {
"run": "deno run bloomcount.ts"
},
"author": "",
"license": "ISC",
"devDependencies": {
"@types/xxhashjs": "^0.2.2",
"typescript": "^5.0.4"
},
"dependencies": {
"xxhashjs": "^0.2.2"
}
}
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "esnext", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "commonjs", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
// "outDir": "./", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment