Skip to content

Instantly share code, notes, and snippets.

@kyrathasoft
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kyrathasoft/5933ab0b9f125751bbe9 to your computer and use it in GitHub Desktop.
Save kyrathasoft/5933ab0b9f125751bbe9 to your computer and use it in GitHub Desktop.
Methods related to string manip in C#
using System;
using System.IO;
using System.Text.RegularExpressions;
using System.Text;
using com.williambryanmiller.testing;
using com.williambryanmiller.files;
namespace com.williambryanmiller.strings {
class MyStrings {
/* Note: the code from this class file is also available on GitHub Gist @
https://gist.github.com/kyrathasoft/5933ab0b9f125751bbe9 */
public static string addEndingPeriodIfNoEndingPunctuation(string p) {
string output = p;
string ending = getFinalNonSpaceCharacter(p);
if (ending != "." && ending != "?" && ending != "!") {
output = p + ".";
}
return output;
}
public static string capitalizeFirstWordInSentence(string theSentence) {
string output = theSentence;
if (!MyTests.containsOnlySpaces(output)) {
output = output.Trim();
string firstChar = output.Substring(0, 1);
string remainder = output.Substring(1, theSentence.Length - 1);
firstChar = firstChar.ToUpper();
output = firstChar + remainder;
} else {
output = string.Empty;
}
return output;
}
public static string capitalizeFirstLetterOfEachSentence(string text) {
//Works on sentences except those preceded by a blank line
string theText = text.Trim();
char[] array = theText.ToCharArray();
if ((Convert.ToInt32(array[0]) >= 97) && (Convert.ToInt32(array[0]) <= 122)) {
array[0] = Convert.ToChar(Convert.ToInt32(array[0]) - 32);
}
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) {
if ((array[i] == '.') || (array[i] == '?') || (array[i] == '!')) {
if ((i + 2) <= array.GetUpperBound(0)) {
if (array[i + 1] == ' ') {
if (Convert.ToInt32(array[i + 2]) != 13) {
string s = array[i + 2].ToString().ToUpper();
char[] singleChar = s.ToCharArray();
array[i + 2] = singleChar[0];
}
}
}
if ((i + 3) <= array.GetUpperBound(0)) {
if (Convert.ToInt32(array[i + 1]) == 13) {
if (Convert.ToInt32(array[i + 2]) == 10) {
string s = array[i + 3].ToString().ToUpper();
char[] singleChar = s.ToCharArray();
array[i + 3] = singleChar[0];
}
}
}
}
}
theText = new string(array);
return theText;
}
public static string capitalizeWordsInString(string the_string) {
string modifiedString = string.Empty;
if (the_string.Trim().Length == 0) {
return modifiedString;
}
modifiedString = the_string.ToLower();
string[] words = modifiedString.Split(' '); //splits sentence into words
modifiedString = string.Empty;
//gets rid of any extraneous leading/trailing spaces in each word
for (int i = 0; i < words.Length; i++) {
words[i] = words[i].Trim();
if (words[i].Length > 0) {
string firstChar = words[i].Substring(0, 1).ToUpper();
string remainder = words[i].Substring(1, words[i].Length - 1);
words[i] = firstChar + remainder;
modifiedString += words[i] + " ";
}
}
modifiedString = modifiedString.TrimEnd();
return modifiedString;
}
public static string connectLastWordAndEndingPeriod(string[] myArray){
string result = String.Empty;
for(int i=0; i < myArray.GetUpperBound(0)-2; i++){
result += myArray[i] + " ";
}
result += myArray[myArray.GetUpperBound(0)-1];
result += myArray[myArray.GetUpperBound(0)];
return result;
}
public static string convertWhitespacesToSingleSpace(string value) {
value = Regex.Replace(value, @"\s+", " ");
return value;
}
public static string getFinalNonSpaceCharacter(string p) {
string final = string.Empty;
p = p.TrimEnd();
if (p.Length > 0) {
final = p.Substring(p.Length - 1, 1);
}
return final;
}
public static int hasPeriodFollowedBySpaceFollowedByNumeric(string p) {
if (p.Contains(". ")) {
int index = p.IndexOf(". ");
if ((index + 2) <= (p.Length - 1)) {
if (MyTests.isNumeric(p[index + 2])) {
return (index + 1);
}
}
}
return -1;
}
public static int numAlphanumerics(string p) {
int num = 0;
foreach (char myChar in p) {
if (MyTests.isAlphanumeric(myChar)) { num++; }
}
return num;
}
public static int numBlankLinesInString(string p) {
string[] lines = p.Split('\n');
int blanks = 0;
foreach (string line in lines) {
if (line.Trim().Length == 0) { blanks++; }
}
return blanks;
}
public static int numLinesInString(string p) {
string[] lines = p.Split('\n');
return lines.Length;
}
public static int numNonSpaceCharacters(string text) {
int cnt = 0;
char[] array = text.ToCharArray();
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) {
if (!Char.IsWhiteSpace(array[i])) {
cnt++;
}
}
return cnt;
}
public static int numSpaceChars(string text) {
int cnt = 0;
char[] array = text.ToCharArray();
for (int i = 0; i < array.GetUpperBound(0) + 1; i++) {
if (Char.IsWhiteSpace(array[i])) {
cnt++;
}
}
return cnt;
}
public static int numWords(string p) {
if (p.Trim().Length == 0) { return 0; }
string[] words = p.Split(' ');
return words.Length;
}
public static int numWordsOfMinimumLength(string[] words, int minLength) {
int result = 0;
for (int i = 0; i < words.Length; i++) {
if (words[i].EndsWith(".")) { words[i] = words[i].Substring(0, words[i].Length - 1); }
if (words[i].EndsWith(",")) { words[i] = words[i].Substring(0, words[i].Length - 1); }
if (words[i].EndsWith(":")) { words[i] = words[i].Substring(0, words[i].Length - 1); }
if (words[i].EndsWith(";")) { words[i] = words[i].Substring(0, words[i].Length - 1); }
if (words[i].Length >= minLength) { result++; }
}
return result;
}
public static string nthToStringWithOrdinalSuffix(int _num) {
/* usage: if you pass in integer 1, string "1st" is returned; and
* if you pass in integer 7, string "7th" is returned; pass a 2 and
* "2nd" will be returned */
string s = string.Empty;
string ofInterest = string.Empty;
if (_num < 10) {
ofInterest = _num.ToString();
} else {
ofInterest = _num.ToString().Substring(_num.ToString().Length - 1, 1);
}
switch (ofInterest) {
case "0":
s = "th";
break;
case "1":
s = "st";
break;
case "2":
s = "nd";
break;
case "3":
s = "rd";
break;
default:
s = "th";
break;
}
s = _num.ToString() + s;
return s;
}
public static string[] parseWordsViaSinglespaceDelimeter(string p) {
p = convertWhitespacesToSingleSpace(p);
string[] words = p.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
return words;
}
public static int periodFollowedByNonSpace(string p) {
int occurrences = 0;
for (int i = 0; i < p.Length - 1; i++) {
if (p[i] == '.') {
if ((i + 1) <= (p.Length - 1)) {
if (p[i + 1] != ' ') { occurrences++; }
}
}
}
return occurrences;
}
public static string removeLastLineOfText(string p) {
string result = p.Trim();
int lastNewline = -1;
lastNewline = p.LastIndexOf('\n');
char[] theChars = p.ToCharArray();
if (lastNewline == -1) {
result = string.Empty;
} else {
result = p.Substring(0, lastNewline);
}
return result;
}
public static string[] removeExtraWhitespaceWithinEachLine(string[] lines_of_text) {
string[] _lines = lines_of_text;
for (int i = 0; i < _lines.GetUpperBound(0) + 1; i++) {
_lines[i] = convertWhitespacesToSingleSpace(_lines[i]).Trim();
}
return _lines;
}
public static string removePrecedingOrEndingPunctuationInString(string p) {
string s = p;
for (int i = 0; i < 3; i++) {
if (s.EndsWith("!") || s.EndsWith(".") || s.EndsWith(",") || s.EndsWith("?") || s.EndsWith(";") || s.EndsWith(":") || s.EndsWith("\"") || s.EndsWith(")")) {
s = s.Substring(0, s.Length - 1);
}
if (s.StartsWith("\"") || s.StartsWith("(")) {
s = s.Substring(1, s.Length - 1);
}
}
return s;
}
public static string replaceNonAlphanumericsWith(string p, char replacement) {
string the_string = string.Empty;
foreach (char c in p) {
if (MyTests.isAlphanumeric(c)) {
the_string += Char.ToString(c);
} else {
the_string += Char.ToString(replacement);
}
}
return the_string;
}
public static string returnLastLineOfText(string p) {
string result = p.Trim();
string[] lines = result.Split('\n');
if (lines != null) {
result = lines[lines.GetUpperBound(0)];
}
return result;
}
public static string returnSubstringEndedByPeriod(string text) {
//returns first substring ending in a period; if such a substring is not found
//within the string passed into this method, returns an empty string
string result = string.Empty;
int index = text.IndexOf('.');
if (index > -1) { result = text.Substring(0, (index + 1)); }
return result;
}
public static string[] return2ElementStrArrayBySplitting(string p, char[] _delimiter, bool _removeEmptyEntries) {
//useful when parsing a line such as customer_name=Bob Miller; just specify '=' as passed _delimiter
string[] s = new string[2];
if (_removeEmptyEntries) {
s = p.Split(_delimiter, StringSplitOptions.RemoveEmptyEntries);
} else {
s = p.Split(_delimiter, StringSplitOptions.None);
}
return s;
}
public static string[] stringToLinesOfText(string p) {
//ignores word wrapping
//lines can be blank, i.e., whitespace only
string[] result = p.Split('\n');
return result;
}
public static string smallerLargerDesc(int inputFileLen, int outputFileLen) {
//compares size of second to size of first
string p = "Your output file's size was " + MyFiles.getFilesizeDesc(outputFileLen) + ", ";
if (inputFileLen > outputFileLen) {
p = MyFiles.getFilesizeDesc(inputFileLen - outputFileLen) + " smaller than your input file.";
} else {
if (inputFileLen < outputFileLen) {
p = MyFiles.getFilesizeDesc(outputFileLen - inputFileLen) + " larger than your ";
p += MyFiles.getFilesizeDesc(inputFileLen) + " input file.";
} else {
p = "the same size as your input file.";
}
}
return p;
}
public static string textToLines(string p, int maxLineLen, int leadSpace){
//this method is primarily meant to help format a string for easier
//viewing once it's printed to the console in a console app
string line, temp, output;
int cnt, totalWords;
if((p.Length > 0) && (!MyTests.containsOnlySpaces(p))){
line = String.Empty;
cnt = 0;
output = String.Empty;
string[] words = p.Split(' ');
totalWords = words.Length;
//Console.WriteLine("Processing {0} words in MyStrings.textToLines(): ", totalWords);
while(cnt <= (totalWords -1)){
line += words[cnt] + " ";
//Console.WriteLine("line = {0}", line);
if((line.Length > maxLineLen) || (cnt == totalWords -1)){
temp = String.Empty;
if(leadSpace > 0){
for(int i=0; i < leadSpace; i++){
temp += " ";
}
}
line = temp + line;
output += line + "\n";
line = String.Empty;
}
cnt++;
}
}else{
output = String.Empty;
}
return output;
}
public static string trimAndLowercase(string s) {
string temp = s;
temp = temp.Trim().ToLower();
return temp;
}
public static int WordCount(string Passage) {
if (Passage.Trim().Length > 0) {
//Replace Carriage returns, tabs and Line Feeds
string temp;
//Create array to hold the split results from the normal string object
string[] tempSplit;
//Create a character delimiter (space) for split function. This will define the number of words.
char[] Seperator = { ' ' };
//Replace Carriage Returns
temp = Passage.Replace((char)13, ' ');
//Replace Line Feeds
temp = temp.Replace((char)10, ' ');
//Replace Tabs, vertical
temp = temp.Replace((char)11, ' ');
//Get rid of all spaces
temp = temp.Replace("&nbsp;", " ");
temp = temp.Trim();
//shrink all multi-spaces to single spaces This uses the regular expression NameSpace to find
//all instances where a space occurs more than once (2 or more)
temp = Regex.Replace(temp, @"\s{2,}", " ");
// This will replace any text which begins and ends with <> and has at least one character in the middle
temp = Regex.Replace(temp, @"<(.|\n)+?>", "");
// Now replace the actual less-than, greater-than characters with their HTML encoded forms.
temp = Regex.Replace(temp, @"<", "&lt;");
temp = Regex.Replace(temp, @">", "&gt;");
//set the string array = to the results from the split of the original string (now devoid of all obstructive characters)
tempSplit = temp.Split(Seperator);
int word_count = 0;
bool btwn_brackets = false;
// ignore all words between square brackets [ ]
foreach (string word in tempSplit) {
if (word.Contains("[") && word.Contains("]")) {
btwn_brackets = false; // one word in brackets
word_count--;
} else if (word.Contains("[") && !word.Contains("]")) {
btwn_brackets = true; // start of brackets
} else if (!word.Contains("[") && word.Contains("]")) {
btwn_brackets = false; // end of brackets
} else {
if (!btwn_brackets) {
word_count++; // no brackets
}
}
}
//finally, return the length of the array, this will be the count of words, in English
return word_count;
} else {
return 0;
}
}
public static string wrapInQuotationMarks(string p) {
return ("\"" + p + "\"");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment