Last active
October 10, 2022 03:06
-
-
Save lsauer/2757250 to your computer and use it in GitHub Desktop.
JavaScript : within a string, count the number of occurances of a character / character counting and string-position
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//www.lsauer.com 2012 | |
//Answer to: | |
//http://stackoverflow.com/questions/881085/count-the-number-of-occurances-of-a-character-in-a-string-in-javascript/10671743#10671743 | |
//There are at least four ways. The best option, which should also be the fastest -owing to the native RegEx engine -, is placed at the top. //jsperf.com is currently down, otherwise I would provide you with performance statistics. | |
#1. | |
("this is foo bar".match(/o/g)||[]).length | |
//>2 | |
#2. | |
"this is foo bar".split("o").length-1 | |
//>2 | |
//split is not recommended. Resource hungry. Allocates new instances of 'Array' for each match. Don't try that for a >100MB file via FileReader. | |
//You can actually easily observe the EXACT resource usage using **Chrome's profiler** option. | |
#3. | |
var stringsearch = "o" | |
,str = "this is foo bar"; | |
for(var count=-1,index=-2; index != -1; count++,index=str.indexOf(stringsearch,index+1) ); | |
//>count:2 | |
#4. | |
//searching for a single character | |
var stringsearch = "o" | |
,str = "this is foo bar"; | |
for(var i=count=0; i<str.length; count+=+(stringsearch===str[i++])); | |
//>count:2 | |
#5. | |
//element mapping and filtering; not recommended due to its overall resource pre-allocation vs. Pythonian 'generators' | |
//provides the position within the string | |
var str = "this is foo bar" | |
str.split('').map( function(e,i){ if(e === 'o') return i;} ) | |
.filter(Boolean) | |
//>[9, 10] | |
[9, 10].length | |
//>2 | |
#6 | |
//'deleting' the character out of the string and measuring the distance in length | |
var str = "this is foo bar"; | |
str.length - str.replace(/o/g,'').length | |
//>2 | |
#7 | |
//based on typed arrays; str2buffer is taken from 'is-lib'; See: https://gist.github.com/lsauer | |
//Converts an ASCII string to an typed-Array buffer | |
str2buffer = function(s){ var bu = new ArrayBuffer(s.length), aUint8 = new Uint8Array(bu ); | |
for(var i=0; i<bu.byteLength; aUint8[i]=s.charCodeAt(i),i++);return aUint8; | |
}; | |
var bstr = str2buffer ("this is foo bar") | |
,schar = 'o'.charCodeAt() | |
,cnt=0; | |
for(var i=0;i<bstr.byteLength;schar!==bstr[i++]||cnt++); | |
//>cnt | |
2 | |
#8 | |
//based on untyped Arrays. Is expected to be slower. Analogous to #7 | |
var ubstr = "this is foo bar".split('').map( function(e,i){ return e.charCodeAt();} ) | |
//>[116, 104, 105, 115, 32, 105, 115, 32, 102, 111, 111, 32, 98, 97, 114] | |
,schar = 'o'.charCodeAt() | |
,cnt=0; | |
for(var i=0;i<ubstr.length;schar!==ubstr[i++]||cnt++); | |
//>cnt | |
2 | |
#9 | |
//using reduce. Note: Element map functions are powerful but slow as they involve their own heap/stack allocation | |
//see: http://stackoverflow.com/questions/10293378/what-is-the-most-efficient-way-of-merging-1-2-and-7-8-into-1-7-2-8/17910641#17910641 | |
var str = "this is foo bar", | |
schar = 'o'; | |
str.split('').reduce( | |
function(p,c,i,a){ if(c === schar || p === schar){return isNaN(parseInt(p))? 1:+p+1;} return p;} | |
) | |
//Note: faster: c === schar || p === schar; slower: (c+p).indexOf(schar)>-1 | |
#10. dictionary character histogram | |
var str = "this is foo bar", | |
schar = 'o', | |
hist={}; | |
for(si in str){ | |
hist[str[si]] = hist[str[si]] ? 1+hist[str[si]]:1; | |
} | |
//>hist[schar] | |
2 | |
//Changelog > 11/2013: | |
// | |
// 24/11/2013 #3 bug fixed in initial index position; pointed out by Augustus@Stackoverflow |
Just another way:
const str = 'this is foo bar';
const count = [...str].filter(l => l === 'o').length;
console.log(count);
Can you explain the number 4 for loop.
count+=+(stringsearch===str[i++])
Here's Another Way;
var text = "The quick brown fox jumps over the lazy dog";
text = text.toLowerCase();
var textLen = text.length;
var searchFor = "the";
var indexOfSearch = text.indexOf(searchFor);
var counter = 0;
for (var i = 0; i < textLen; i++) {
if (text.indexOf(text.charAt(i)) === indexOfSearch) {
counter++
}
}
console.log(counter);
which one is faster?
var text = "The quick brown fox jumps over the lazy dog";
document.write("Text: "+text+"
"+"There are "+text.match(/the/gi).length+" of word the");
Intuitively, it seems like this should be fastest:
const s = "this is foo bar";
const oCount = s.length - s.replaceAll('o', '').length;
If there are only two kinds of character in the string, then this is faster still:
const s = "001101001";
const oneCount = s.replaceAll('0', '').length;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good code.
How would you deal with finding the maximum occurrence of substrings in a larger string without looking for a specific string?
EX: "can can ran tan van"
The output being "an".