Skip to content

Instantly share code, notes, and snippets.

View wolfgarbe's full-sized avatar
🎯
Focusing

Wolf Garbe wolfgarbe

🎯
Focusing
View GitHub Profile
@wolfgarbe
wolfgarbe / WordSegmentationDP.cs
Last active April 21, 2018 14:07
WordSegmentationDP: Word segmentation with Dynamic Programming
//MIT License: Copyright (c) 2018 Wolf Garbe
//https://github.com/wolfgarbe/WordSegmentationDP
/// <summary>Find best word segmentation for input string.</summary>
/// <param name="input">The string being word segmented.</param>
/// <param name="maxSegmentationWordLength">The maximum word length that should be considered.</param>
/// <returns>A tuple representing the suggested word segmented text and the sum of logarithmic word occurence probabilities.</returns>
static (string segmentedString, decimal probabilityLogSum) WordSegmentationDP(string input, int maxSegmentationWordLength = 20, Dictionary<string, (string segmentedString, decimal probabilityLogSum)> cache = null)
{
//memoization: check wheather input has already calculated, if yes then return from cache
if (cache == null) cache = new Dictionary<string, (string segmentedString, decimal probabilityLogSum)>();
@wolfgarbe
wolfgarbe / CompositionGeneration.cs
Last active April 19, 2018 12:10
Composition Generation
public void CompositionGeneration(string input, string composition=””)
{
for (int i=1;i<=input.Length;i++)
{
string part1 = input.Substring(0, i);
//recursion with the remainder of the string
if (part1.Length < input.Length)
CompositionGeneration(input.Substring(i),composition+part1+“ “);
//display composition
else Console.WriteLine(composition+part1);
@wolfgarbe
wolfgarbe / WordSegmentationTM.cs
Last active April 27, 2018 13:29
WordSegmentationTM: Fast Word segmentation with a Triangular Matrix
//MIT License: Copyright (c) 2018 Wolf Garbe
//https://github.com/wolfgarbe/WordSegmentationTM
/// <summary>Find best word segmentation for input string.</summary>
/// <param name="input">The string being word segmented.</param>
/// <param name="maxSegmentationWordLength">The maximum word length that should be considered.</param>
/// <returns>A tuple representing the suggested word segmented text and the sum of logarithmic word occurence probabilities.</returns>
public static (string segmentedString, decimal probabilityLogSum) WordSegmentationTM(string input, int maxSegmentationWordLength = 20)
{
int arraySize = Math.Min(maxSegmentationWordLength, input.Length);
int arrayWidth = ((input.Length - 1) >> 6) + 1; // /64 bit