This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//MIT License: Copyright (c) 2018 Wolf Garbe | |
//https://github.com/wolfgarbe/WordSegmentationTM | |
/// <summary>Find best word segmentation for input string.</summary> | |
/// <param name="input">The string being word segmented.</param> | |
/// <param name="maxSegmentationWordLength">The maximum word length that should be considered.</param> | |
/// <returns>A tuple representing the suggested word segmented text and the sum of logarithmic word occurence probabilities.</returns> | |
public static (string segmentedString, decimal probabilityLogSum) WordSegmentationTM(string input, int maxSegmentationWordLength = 20) | |
{ | |
int arraySize = Math.Min(maxSegmentationWordLength, input.Length); | |
int arrayWidth = ((input.Length - 1) >> 6) + 1; // /64 bit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void CompositionGeneration(string input, string composition=””) | |
{ | |
for (int i=1;i<=input.Length;i++) | |
{ | |
string part1 = input.Substring(0, i); | |
//recursion with the remainder of the string | |
if (part1.Length < input.Length) | |
CompositionGeneration(input.Substring(i),composition+part1+“ “); | |
//display composition | |
else Console.WriteLine(composition+part1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//MIT License: Copyright (c) 2018 Wolf Garbe | |
//https://github.com/wolfgarbe/WordSegmentationDP | |
/// <summary>Find best word segmentation for input string.</summary> | |
/// <param name="input">The string being word segmented.</param> | |
/// <param name="maxSegmentationWordLength">The maximum word length that should be considered.</param> | |
/// <returns>A tuple representing the suggested word segmented text and the sum of logarithmic word occurence probabilities.</returns> | |
static (string segmentedString, decimal probabilityLogSum) WordSegmentationDP(string input, int maxSegmentationWordLength = 20, Dictionary<string, (string segmentedString, decimal probabilityLogSum)> cache = null) | |
{ | |
//memoization: check wheather input has already calculated, if yes then return from cache | |
if (cache == null) cache = new Dictionary<string, (string segmentedString, decimal probabilityLogSum)>(); |