Last active
August 29, 2015 14:02
-
-
Save kyrathasoft/5933ab0b9f125751bbe9 to your computer and use it in GitHub Desktop.
Methods related to string manip in C#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Text.RegularExpressions; | |
using System.Text; | |
using com.williambryanmiller.testing; | |
using com.williambryanmiller.files; | |
namespace com.williambryanmiller.strings { | |
class MyStrings { | |
/* Note: the code from this class file is also available on GitHub Gist @ | |
https://gist.github.com/kyrathasoft/5933ab0b9f125751bbe9 */ | |
public static string addEndingPeriodIfNoEndingPunctuation(string p) { | |
string output = p; | |
string ending = getFinalNonSpaceCharacter(p); | |
if (ending != "." && ending != "?" && ending != "!") { | |
output = p + "."; | |
} | |
return output; | |
} | |
public static string capitalizeFirstWordInSentence(string theSentence) { | |
string output = theSentence; | |
if (!MyTests.containsOnlySpaces(output)) { | |
output = output.Trim(); | |
string firstChar = output.Substring(0, 1); | |
string remainder = output.Substring(1, theSentence.Length - 1); | |
firstChar = firstChar.ToUpper(); | |
output = firstChar + remainder; | |
} else { | |
output = string.Empty; | |
} | |
return output; | |
} | |
public static string capitalizeFirstLetterOfEachSentence(string text) { | |
//Works on sentences except those preceded by a blank line | |
string theText = text.Trim(); | |
char[] array = theText.ToCharArray(); | |
if ((Convert.ToInt32(array[0]) >= 97) && (Convert.ToInt32(array[0]) <= 122)) { | |
array[0] = Convert.ToChar(Convert.ToInt32(array[0]) - 32); | |
} | |
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) { | |
if ((array[i] == '.') || (array[i] == '?') || (array[i] == '!')) { | |
if ((i + 2) <= array.GetUpperBound(0)) { | |
if (array[i + 1] == ' ') { | |
if (Convert.ToInt32(array[i + 2]) != 13) { | |
string s = array[i + 2].ToString().ToUpper(); | |
char[] singleChar = s.ToCharArray(); | |
array[i + 2] = singleChar[0]; | |
} | |
} | |
} | |
if ((i + 3) <= array.GetUpperBound(0)) { | |
if (Convert.ToInt32(array[i + 1]) == 13) { | |
if (Convert.ToInt32(array[i + 2]) == 10) { | |
string s = array[i + 3].ToString().ToUpper(); | |
char[] singleChar = s.ToCharArray(); | |
array[i + 3] = singleChar[0]; | |
} | |
} | |
} | |
} | |
} | |
theText = new string(array); | |
return theText; | |
} | |
public static string capitalizeWordsInString(string the_string) { | |
string modifiedString = string.Empty; | |
if (the_string.Trim().Length == 0) { | |
return modifiedString; | |
} | |
modifiedString = the_string.ToLower(); | |
string[] words = modifiedString.Split(' '); //splits sentence into words | |
modifiedString = string.Empty; | |
//gets rid of any extraneous leading/trailing spaces in each word | |
for (int i = 0; i < words.Length; i++) { | |
words[i] = words[i].Trim(); | |
if (words[i].Length > 0) { | |
string firstChar = words[i].Substring(0, 1).ToUpper(); | |
string remainder = words[i].Substring(1, words[i].Length - 1); | |
words[i] = firstChar + remainder; | |
modifiedString += words[i] + " "; | |
} | |
} | |
modifiedString = modifiedString.TrimEnd(); | |
return modifiedString; | |
} | |
public static string connectLastWordAndEndingPeriod(string[] myArray){ | |
string result = String.Empty; | |
for(int i=0; i < myArray.GetUpperBound(0)-2; i++){ | |
result += myArray[i] + " "; | |
} | |
result += myArray[myArray.GetUpperBound(0)-1]; | |
result += myArray[myArray.GetUpperBound(0)]; | |
return result; | |
} | |
public static string convertWhitespacesToSingleSpace(string value) { | |
value = Regex.Replace(value, @"\s+", " "); | |
return value; | |
} | |
public static string getFinalNonSpaceCharacter(string p) { | |
string final = string.Empty; | |
p = p.TrimEnd(); | |
if (p.Length > 0) { | |
final = p.Substring(p.Length - 1, 1); | |
} | |
return final; | |
} | |
public static int hasPeriodFollowedBySpaceFollowedByNumeric(string p) { | |
if (p.Contains(". ")) { | |
int index = p.IndexOf(". "); | |
if ((index + 2) <= (p.Length - 1)) { | |
if (MyTests.isNumeric(p[index + 2])) { | |
return (index + 1); | |
} | |
} | |
} | |
return -1; | |
} | |
public static int numAlphanumerics(string p) { | |
int num = 0; | |
foreach (char myChar in p) { | |
if (MyTests.isAlphanumeric(myChar)) { num++; } | |
} | |
return num; | |
} | |
public static int numBlankLinesInString(string p) { | |
string[] lines = p.Split('\n'); | |
int blanks = 0; | |
foreach (string line in lines) { | |
if (line.Trim().Length == 0) { blanks++; } | |
} | |
return blanks; | |
} | |
public static int numLinesInString(string p) { | |
string[] lines = p.Split('\n'); | |
return lines.Length; | |
} | |
public static int numNonSpaceCharacters(string text) { | |
int cnt = 0; | |
char[] array = text.ToCharArray(); | |
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) { | |
if (!Char.IsWhiteSpace(array[i])) { | |
cnt++; | |
} | |
} | |
return cnt; | |
} | |
public static int numSpaceChars(string text) { | |
int cnt = 0; | |
char[] array = text.ToCharArray(); | |
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) { | |
if (Char.IsWhiteSpace(array[i])) { | |
cnt++; | |
} | |
} | |
return cnt; | |
} | |
public static int numWords(string p) { | |
if (p.Trim().Length == 0) { return 0; } | |
string[] words = p.Split(' '); | |
return words.Length; | |
} | |
public static int numWordsOfMinimumLength(string[] words, int minLength) { | |
int result = 0; | |
for (int i = 0; i < words.Length; i++) { | |
if (words[i].EndsWith(".")) { words[i] = words[i].Substring(0, words[i].Length - 1); } | |
if (words[i].EndsWith(",")) { words[i] = words[i].Substring(0, words[i].Length - 1); } | |
if (words[i].EndsWith(":")) { words[i] = words[i].Substring(0, words[i].Length - 1); } | |
if (words[i].EndsWith(";")) { words[i] = words[i].Substring(0, words[i].Length - 1); } | |
if (words[i].Length >= minLength) { result++; } | |
} | |
return result; | |
} | |
public static string nthToStringWithOrdinalSuffix(int _num) { | |
/* usage: if you pass in integer 1, string "1st" is returned; and | |
* if you pass in integer 7, string "7th" is returned; pass a 2 and | |
* "2nd" will be returned */ | |
string s = string.Empty; | |
string ofInterest = string.Empty; | |
if (_num < 10) { | |
ofInterest = _num.ToString(); | |
} else { | |
ofInterest = _num.ToString().Substring(_num.ToString().Length - 1, 1); | |
} | |
switch (ofInterest) { | |
case "0": | |
s = "th"; | |
break; | |
case "1": | |
s = "st"; | |
break; | |
case "2": | |
s = "nd"; | |
break; | |
case "3": | |
s = "rd"; | |
break; | |
default: | |
s = "th"; | |
break; | |
} | |
s = _num.ToString() + s; | |
return s; | |
} | |
public static string[] parseWordsViaSinglespaceDelimeter(string p) { | |
p = convertWhitespacesToSingleSpace(p); | |
string[] words = p.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); | |
return words; | |
} | |
public static int periodFollowedByNonSpace(string p) { | |
int occurrences = 0; | |
for (int i = 0; i < p.Length - 1; i++) { | |
if (p[i] == '.') { | |
if ((i + 1) <= (p.Length - 1)) { | |
if (p[i + 1] != ' ') { occurrences++; } | |
} | |
} | |
} | |
return occurrences; | |
} | |
public static string removeLastLineOfText(string p) { | |
string result = p.Trim(); | |
int lastNewline = -1; | |
lastNewline = p.LastIndexOf('\n'); | |
char[] theChars = p.ToCharArray(); | |
if (lastNewline == -1) { | |
result = string.Empty; | |
} else { | |
result = p.Substring(0, lastNewline); | |
} | |
return result; | |
} | |
public static string[] removeExtraWhitespaceWithinEachLine(string[] lines_of_text) { | |
string[] _lines = lines_of_text; | |
for (int i = 0; i < _lines.GetUpperBound(0) + 1; i++) { | |
_lines[i] = convertWhitespacesToSingleSpace(_lines[i]).Trim(); | |
} | |
return _lines; | |
} | |
public static string removePrecedingOrEndingPunctuationInString(string p) { | |
string s = p; | |
for (int i = 0; i < 3; i++) { | |
if (s.EndsWith("!") || s.EndsWith(".") || s.EndsWith(",") || s.EndsWith("?") || s.EndsWith(";") || s.EndsWith(":") || s.EndsWith("\"") || s.EndsWith(")")) { | |
s = s.Substring(0, s.Length - 1); | |
} | |
if (s.StartsWith("\"") || s.StartsWith("(")) { | |
s = s.Substring(1, s.Length - 1); | |
} | |
} | |
return s; | |
} | |
public static string replaceNonAlphanumericsWith(string p, char replacement) { | |
string the_string = string.Empty; | |
foreach (char c in p) { | |
if (MyTests.isAlphanumeric(c)) { | |
the_string += Char.ToString(c); | |
} else { | |
the_string += Char.ToString(replacement); | |
} | |
} | |
return the_string; | |
} | |
public static string returnLastLineOfText(string p) { | |
string result = p.Trim(); | |
string[] lines = result.Split('\n'); | |
if (lines != null) { | |
result = lines[lines.GetUpperBound(0)]; | |
} | |
return result; | |
} | |
public static string returnSubstringEndedByPeriod(string text) { | |
//returns first substring ending in a period; if such a substring is not found | |
//within the string passed into this method, returns an empty string | |
string result = string.Empty; | |
int index = text.IndexOf('.'); | |
if (index > -1) { result = text.Substring(0, (index + 1)); } | |
return result; | |
} | |
public static string[] return2ElementStrArrayBySplitting(string p, char[] _delimiter, bool _removeEmptyEntries) { | |
//useful when parsing a line such as customer_name=Bob Miller; just specify '=' as passed _delimiter | |
string[] s = new string[2]; | |
if (_removeEmptyEntries) { | |
s = p.Split(_delimiter, StringSplitOptions.RemoveEmptyEntries); | |
} else { | |
s = p.Split(_delimiter, StringSplitOptions.None); | |
} | |
return s; | |
} | |
public static string[] stringToLinesOfText(string p) { | |
//ignores word wrapping | |
//lines can be blank, i.e., whitespace only | |
string[] result = p.Split('\n'); | |
return result; | |
} | |
public static string smallerLargerDesc(int inputFileLen, int outputFileLen) { | |
//compares size of second to size of first | |
string p = "Your output file's size was " + MyFiles.getFilesizeDesc(outputFileLen) + ", "; | |
if (inputFileLen > outputFileLen) { | |
p = MyFiles.getFilesizeDesc(inputFileLen - outputFileLen) + " smaller than your input file."; | |
} else { | |
if (inputFileLen < outputFileLen) { | |
p = MyFiles.getFilesizeDesc(outputFileLen - inputFileLen) + " larger than your "; | |
p += MyFiles.getFilesizeDesc(inputFileLen) + " input file."; | |
} else { | |
p = "the same size as your input file."; | |
} | |
} | |
return p; | |
} | |
public static string textToLines(string p, int maxLineLen, int leadSpace){ | |
//this method is primarily meant to help format a string for easier | |
//viewing once it's printed to the console in a console app | |
string line, temp, output; | |
int cnt, totalWords; | |
if((p.Length > 0) && (!MyTests.containsOnlySpaces(p))){ | |
line = String.Empty; | |
cnt = 0; | |
output = String.Empty; | |
string[] words = p.Split(' '); | |
totalWords = words.Length; | |
//Console.WriteLine("Processing {0} words in MyStrings.textToLines(): ", totalWords); | |
while(cnt <= (totalWords -1)){ | |
line += words[cnt] + " "; | |
//Console.WriteLine("line = {0}", line); | |
if((line.Length > maxLineLen) || (cnt == totalWords -1)){ | |
temp = String.Empty; | |
if(leadSpace > 0){ | |
for(int i=0; i < leadSpace; i++){ | |
temp += " "; | |
} | |
} | |
line = temp + line; | |
output += line + "\n"; | |
line = String.Empty; | |
} | |
cnt++; | |
} | |
}else{ | |
output = String.Empty; | |
} | |
return output; | |
} | |
public static string trimAndLowercase(string s) { | |
string temp = s; | |
temp = temp.Trim().ToLower(); | |
return temp; | |
} | |
public static int WordCount(string Passage) { | |
if (Passage.Trim().Length > 0) { | |
//Replace Carriage returns, tabs and Line Feeds | |
string temp; | |
//Create array to hold the split results from the normal string object | |
string[] tempSplit; | |
//Create a character delimiter (space) for split function. This will define the number of words. | |
char[] Seperator = { ' ' }; | |
//Replace Carriage Returns | |
temp = Passage.Replace((char)13, ' '); | |
//Replace Line Feeds | |
temp = temp.Replace((char)10, ' '); | |
//Replace Tabs, vertical | |
temp = temp.Replace((char)11, ' '); | |
//Get rid of all spaces | |
temp = temp.Replace(" ", " "); | |
temp = temp.Trim(); | |
//shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find | |
//all instances where a space occurs more than once (2 or more) | |
temp = Regex.Replace(temp, @"\s{2,}", " "); | |
// This will replace any text which begins and ends with <> and has at least one character in the middle | |
temp = Regex.Replace(temp, @"<(.|\n)+?>", ""); | |
// Now replace the actual less-than, greater-than characters with their HTML encoded forms. | |
temp = Regex.Replace(temp, @"<", "<"); | |
temp = Regex.Replace(temp, @">", ">"); | |
//set the string array = to the results from the split of the original string (now devoid of all obstructive characters) | |
tempSplit = temp.Split(Seperator); | |
int word_count = 0; | |
bool btwn_brackets = false; | |
// ignore all words between square brackets [ ] | |
foreach (string word in tempSplit) { | |
if (word.Contains("[") && word.Contains("]")) { | |
btwn_brackets = false; // one word in brackets | |
word_count--; | |
} else if (word.Contains("[") && !word.Contains("]")) { | |
btwn_brackets = true; // start of brackets | |
} else if (!word.Contains("[") && word.Contains("]")) { | |
btwn_brackets = false; // end of brackets | |
} else { | |
if (!btwn_brackets) { | |
word_count++; // no brackets | |
} | |
} | |
} | |
//finally, return the length of the array, this will be the count of words, in English | |
return word_count; | |
} else { | |
return 0; | |
} | |
} | |
public static string wrapInQuotationMarks(string p) { | |
return ("\"" + p + "\""); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment