StringSplits is a helper class thought up by Marc Gravell we use in the Stack Overflow/Stack Exchange code base to save on string allocations and relieve the garbage collector on gen 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Using a single instance of these arrays prevents an allocation of a new array on every call to something like | |
/// string.Split(';') which is really calling string string.Split(params char[] separator), causing an allocation | |
/// of an additional array behind the scenes. | |
/// </summary> | |
/// <remarks> | |
/// For most applications this is a micro-optimization that doesn't matter. However, for very high traffic code paths | |
/// this can generate a significant amount of items that the gen 0 garbage collector needs to collect, causing micro-stalls | |
/// in the app domain when it runs, or runs more often. | |
/// </remarks> | |
public static class StringSplits | |
{ | |
public static readonly char[] Space = { ' ' }, | |
Comma = { ',' }, | |
Period = { '.' }, | |
Minus = { '-' }, | |
Plus = { '+' }, | |
Asterisk = { '*' }, | |
Percent = { '%' }, | |
Ampersand = { '&' }, | |
Equal = { '=' }, | |
Underscore = { '_' }, | |
NewLine = { '\n' }, | |
SemiColon = { ';' }, | |
Colon = { ':' }, | |
VerticalBar = { '|' }, | |
ForwardSlash = { '/' }, | |
DoubleQuote = { '"' }, | |
NewLine_CarriageReturn = { '\n', '\r' }, | |
Comma_Space = { ',', ' ' }, | |
Comma_SemiColon = { ',', ';' }, | |
Comma_SemiColon_Space = { ',', ';', ' ' }, | |
BackSlash_Slash_Period = { '\\', '/', '.' }, | |
DoubleRightArrow = { '»' }; | |
public static readonly string[] CarriageReturnNewLineString = { "\r\n" }; | |
/// <summary> | |
/// Returns a list of ints (usually representing models' ids) from a semi-colon delimited string, e.g. "15145651;15145707;15145703" | |
/// </summary> | |
public static List<int> SplitVectorizedIds(this string semiColonDelimitedIds) | |
{ | |
var result = new List<int>(); | |
if (!string.IsNullOrEmpty(semiColonDelimitedIds)) | |
{ | |
foreach (var idStr in semiColonDelimitedIds.Split(StringSplits.SemiColon)) | |
{ | |
int id; | |
if (int.TryParse(idStr, out id)) | |
{ | |
result.Add(id); | |
} | |
} | |
} | |
return result; | |
} | |
/// <summary> | |
/// For seeing if a token is in a delimited string without splitting it out into individual string components first. | |
/// Calling .Split('<delim>') causes an allocation of additional strings which are also not interned and use duplicate | |
/// memory to store the same thing. If you're splitting a setting or something else often, splitting would put a lot | |
/// of copies of the same string that then need to be garbage collected. | |
/// This helps on 3 points: lookup faster, allocate less, cleanup less. | |
/// <summary> | |
public static bool ContainsToken(string value, string token, char delimiter = ';') | |
{ | |
if (string.IsNullOrEmpty(token)) return false; | |
if (string.IsNullOrEmpty(value)) return false; | |
int lastIndex = -1, idx, endIndex = value.Length - token.Length, tokenLength = token.Length; | |
while ((idx = value.IndexOf(token, lastIndex + 1)) > lastIndex) | |
{ | |
lastIndex = idx; | |
if ((idx == 0 || (value[idx - 1] == delimiter)) | |
&& (idx == endIndex || (value[idx + tokenLength] == delimiter))) | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment