Skip to content

Instantly share code, notes, and snippets.

@exactlyallan
Created June 29, 2021 22:54
Show Gist options
  • Save exactlyallan/4362b4e6b37cfaccc8ddefe7d5b5187d to your computer and use it in GitHub Desktop.
Save exactlyallan/4362b4e6b37cfaccc8ddefe7d5b5187d to your computer and use it in GitHub Desktop.
node-RAPIDS regex example
// Using https://github.com/rapidsai/node-rapids/
const cudf = require('@rapidsai/cudf');
const regexps = [
/Cloud|Overcast/,
/Rain|T-Storm|Thunderstorm|Squalls|Drizzle/,
/Snow/,
/Fog/,
/Ice|Hail|Freezing|Sleet/,
/Dust|Smoke|Sand/,
];
console.log('');
const weather_condition_gpu = cudf.DataFrame.readCSV({
header: 0,
sourceType: 'files',
sources: [`${__dirname}/US_Accidents_Dec20.csv`],
dataTypes: {
id: 'str', source: 'str', tmc: 'float64', severity: 'int32', start_time: 'str', end_time: 'str',
start_lat: 'float64', start_lng: 'float64', end_lat: 'float64', end_lng: 'float64',
distance: 'float64', description: 'str', number: 'int32', street: 'str', side: 'str',
city: 'str', county: 'str', state: 'str', zipcode: 'str', country: 'str', timezone: 'str', airport_code: 'str',
weather_timestamp: 'str', temperature: 'float64', wind_chill: 'float64', humidity: 'float64', pressure: 'float64',
visibility: 'float64', wind_direction: 'str', wind_speed: 'float64', precipitation: 'float64', weather_condition: 'str',
amenity: 'bool', bump: 'bool', crossing: 'bool', give_way: 'bool', junction: 'bool', no_exit: 'bool', railway: 'bool',
roundabout: 'bool', station: 'bool', stop: 'bool', traffic_calming: 'bool', traffic_signal: 'bool', turning_loop: 'bool',
sunrise_sunset: 'str', civil_twilight: 'str', nautical_twighlight: 'str', astronomical_twighlight: 'str'
},
}).get('weather_condition');
console.time(`GPU time`);
regexps.forEach((regexp) => {
console.time(`${regexp.source} time`);
const matches = weather_condition_gpu.containsRe(regexp.source).sum();
console.timeEnd(`${regexp.source} time`);
console.log(`${regexp.source} matches: ${matches.toLocaleString()}`);
});
console.timeEnd(`GPU time`);
console.log('');
const weather_condition_cpu = (() => {
const categorical = weather_condition_gpu.cast(new cudf.Categorical(new cudf.Utf8String));
const categories = [...categorical.categories];
const codes = [...categorical.codes];
return codes.map((i) => categories[i]);
})();
console.time(`CPU time`);
regexps.forEach((regexp) => {
console.time(`${regexp.source} time`);
const matches = weather_condition_cpu.reduce((matches, weather_condition) => {
return matches + (regexp.exec(weather_condition) || []).length;
}, 0);
console.timeEnd(`${regexp.source} time`);
console.log(`${regexp.source} matches: ${matches.toLocaleString()}`);
});
console.timeEnd(`CPU time`);
console.log('');
/* OUTPUT:
---------------------------
// 1.6GB .CSV
// GPU: Titan RTX
Cloud|Overcast time: 26.819ms
Cloud|Overcast matches: 1,896,354
Rain|T-Storm|Thunderstorm|Squalls|Drizzle time: 63.813ms
Rain|T-Storm|Thunderstorm|Squalls|Drizzle matches: 326,441
Snow time: 6.396ms
Snow matches: 68,101
Fog time: 6.997ms
Fog matches: 52,063
Ice|Hail|Freezing|Sleet time: 44.031ms
Ice|Hail|Freezing|Sleet matches: 4,698
Dust|Smoke|Sand time: 29.932ms
Dust|Smoke|Sand matches: 8,846
GPU time: 190.457ms
// CPU: AMD Ryzen Threadripper 1900X 8-Core (3.8GHZ)
Cloud|Overcast time: 244.493ms
Cloud|Overcast matches: 1,896,354
Rain|T-Storm|Thunderstorm|Squalls|Drizzle time: 192.591ms
Rain|T-Storm|Thunderstorm|Squalls|Drizzle matches: 326,441
Snow time: 206.071ms
Snow matches: 68,101
Fog time: 204.61ms
Fog matches: 52,063
Ice|Hail|Freezing|Sleet time: 214.325ms
Ice|Hail|Freezing|Sleet matches: 4,698
Dust|Smoke|Sand time: 164.633ms
Dust|Smoke|Sand matches: 8,846
CPU time: 1.230s
---------------------------
// GPU is 6.45x faster than CPU
*/
@exactlyallan
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment