Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save smdn/f062ea075ec83964079e437e282f4746 to your computer and use it in GitHub Desktop.
Save smdn/f062ea075ec83964079e437e282f4746 to your computer and use it in GitHub Desktop.
Smdn.Text.Ondulish 4.0.0-preview1 Release Notes

main/Smdn.Text.Ondulish-4.0.0-preview1

diff --git a/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-net6.0.apilist.cs b/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-net6.0.apilist.cs
new file mode 100644
index 0000000..4d6157c
--- /dev/null
+++ b/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-net6.0.apilist.cs
@@ -0,0 +1,223 @@
+// Smdn.Text.Ondulish.dll (Smdn.Text.Ondulish-4.0.0-preview1)
+// Name: Smdn.Text.Ondulish
+// AssemblyVersion: 4.0.0.0
+// InformationalVersion: 4.0.0-preview1+e47f0cd7079b79c34ed00e252d68a337f739d938
+// TargetFramework: .NETCoreApp,Version=v6.0
+// Configuration: Release
+#nullable enable annotations
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using MeCab;
+
+namespace MeCab {
+ public class DictionaryInfo : IDisposable {
+ protected bool swigCMemOwn;
+
+ public DictionaryInfo() {}
+
+ public string charset { get; }
+ public string filename { get; }
+ public uint lsize { get; }
+ public DictionaryInfo next { get; }
+ public uint rsize { get; }
+ public uint size { get; }
+ public int type { get; }
+ public ushort version { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~DictionaryInfo() {}
+ }
+
+ public class Lattice : IDisposable {
+ protected bool swigCMemOwn;
+
+ public Lattice() {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Lattice() {}
+ public virtual double Z() {}
+ public virtual void add_request_type(int request_type) {}
+ public virtual Node begin_nodes(uint pos) {}
+ public virtual Node bos_node() {}
+ public virtual int boundary_constraint(uint pos) {}
+ public virtual void clear() {}
+ public virtual Node end_nodes(uint pos) {}
+ public virtual string enumNBestAsString(uint N) {}
+ public virtual Node eos_node() {}
+ public virtual string feature_constraint(uint pos) {}
+ public virtual bool has_constraint() {}
+ public virtual bool has_request_type(int request_type) {}
+ public virtual bool is_available() {}
+ public virtual Node newNode() {}
+ public virtual bool next() {}
+ public virtual void remove_request_type(int request_type) {}
+ public virtual int request_type() {}
+ public virtual string sentence() {}
+ public virtual void set_Z(double Z) {}
+ public virtual void set_boundary_constraint(uint pos, int boundary_constraint_type) {}
+ public virtual void set_feature_constraint(uint begin_pos, uint end_pos, string feature) {}
+ public virtual void set_request_type(int request_type) {}
+ public virtual void set_result(string result) {}
+ public void set_sentence(string sentence) {}
+ public virtual void set_theta(float theta) {}
+ public virtual void set_what(string str) {}
+ public virtual uint size() {}
+ public virtual float theta() {}
+ public virtual string toString() {}
+ public virtual string toString(Node node) {}
+ public virtual string what() {}
+ }
+
+ public class MeCab {
+ public static readonly int MECAB_ALLOCATE_SENTENCE = 64;
+ public static readonly int MECAB_ALL_MORPHS = 32;
+ public static readonly int MECAB_ALTERNATIVE = 16;
+ public static readonly int MECAB_ANY_BOUNDARY = 0;
+ public static readonly int MECAB_BOS_NODE = 2;
+ public static readonly int MECAB_EON_NODE = 4;
+ public static readonly int MECAB_EOS_NODE = 3;
+ public static readonly int MECAB_INSIDE_TOKEN = 2;
+ public static readonly int MECAB_MARGINAL_PROB = 8;
+ public static readonly int MECAB_NBEST = 2;
+ public static readonly int MECAB_NOR_NODE = 0;
+ public static readonly int MECAB_ONE_BEST = 1;
+ public static readonly int MECAB_PARTIAL = 4;
+ public static readonly int MECAB_SYS_DIC = 0;
+ public static readonly int MECAB_TOKEN_BOUNDARY = 1;
+ public static readonly int MECAB_UNK_DIC = 2;
+ public static readonly int MECAB_UNK_NODE = 1;
+ public static readonly int MECAB_USR_DIC = 1;
+ public static readonly string VERSION = "0.996";
+
+ public MeCab() {}
+ }
+
+ public class Model : IDisposable {
+ public static string version() {}
+
+ protected bool swigCMemOwn;
+
+ public Model() {}
+ public Model(string argc) {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Model() {}
+ public virtual Lattice createLattice() {}
+ public virtual Tagger createTagger() {}
+ public virtual DictionaryInfo dictionary_info() {}
+ public virtual Node lookup(string begin, string end, Lattice lattice) {}
+ public virtual bool swap(Model model) {}
+ public virtual int transition_cost(ushort rcAttr, ushort lcAttr) {}
+ }
+
+ public class Node : IDisposable {
+ protected bool swigCMemOwn;
+
+ public float alpha { get; }
+ public float beta { get; }
+ public Node bnext { get; }
+ public byte char_type { get; }
+ public int cost { get; }
+ public Node enext { get; }
+ public string feature { get; }
+ public uint id { get; }
+ public byte isbest { get; }
+ public ushort lcAttr { get; }
+ public ushort length { get; }
+ public Path lpath { get; }
+ public Node next { get; }
+ public ushort posid { get; }
+ public Node prev { get; }
+ public float prob { get; set; }
+ public ushort rcAttr { get; }
+ public ushort rlength { get; }
+ public Path rpath { get; }
+ public byte stat { get; }
+ public string surface { get; }
+ public short wcost { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Node() {}
+ }
+
+ public class Path : IDisposable {
+ protected bool swigCMemOwn;
+
+ public int cost { get; }
+ public Path lnext { get; }
+ public Node lnode { get; }
+ public float prob { get; set; }
+ public Path rnext { get; }
+ public Node rnode { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Path() {}
+ }
+
+ public class Tagger : IDisposable {
+ public static bool parse(Model model, Lattice lattice) {}
+ public static string version() {}
+
+ protected bool swigCMemOwn;
+
+ public Tagger() {}
+ public Tagger(string argc) {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Tagger() {}
+ public virtual bool all_morphs() {}
+ public virtual DictionaryInfo dictionary_info() {}
+ public virtual string formatNode(Node node) {}
+ public virtual int lattice_level() {}
+ public virtual string next() {}
+ public virtual Node nextNode() {}
+ public virtual bool parse(Lattice lattice) {}
+ public virtual string parse(string str) {}
+ public virtual string parseNBest(uint N, string str) {}
+ public virtual bool parseNBestInit(string str) {}
+ public virtual Node parseToNode(string str) {}
+ public string parseToString(string str) {}
+ public string parseToString(string str, uint length) {}
+ public virtual bool partial() {}
+ public virtual int request_type() {}
+ public virtual void set_all_morphs(bool all_morphs) {}
+ public virtual void set_lattice_level(int level) {}
+ public virtual void set_partial(bool @partial) {}
+ public virtual void set_request_type(int request_type) {}
+ public virtual void set_theta(float theta) {}
+ public virtual float theta() {}
+ public virtual string what() {}
+ }
+}
+
+namespace Smdn.Text.Ondulish {
+ public static class KanaUtils {
+ public static string ConvertWideHiraganaToKatakana(string input) {}
+ public static string ConvertWideKatakanaToHiragana(string input) {}
+ public static string ConvertWideKatakanaToNarrowKatakana(string input) {}
+ }
+
+ public class Translator : IDisposable {
+ public static Tagger CreateTaggerForBundledDictionary() {}
+
+ public Translator() {}
+ public Translator(Tagger tagger, bool shouldDisposeTagger) {}
+
+ public IReadOnlyDictionary<string, string> PhraseDictionary { get; }
+ public IReadOnlyDictionary<string, string> WordDictionary { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ public string Translate(string input, bool convertKatakanaToNarrow = true) {}
+ public void Translate(TextReader input, TextWriter output, bool convertKatakanaToNarrow = true) {}
+ public void Translate(string input, TextWriter output, bool convertKatakanaToNarrow = true) {}
+ }
+}
diff --git a/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-netstandard2.1.apilist.cs b/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-netstandard2.1.apilist.cs
new file mode 100644
index 0000000..17e77b7
--- /dev/null
+++ b/doc/api-list/Smdn.Text.Ondulish/Smdn.Text.Ondulish-netstandard2.1.apilist.cs
@@ -0,0 +1,223 @@
+// Smdn.Text.Ondulish.dll (Smdn.Text.Ondulish-4.0.0-preview1)
+// Name: Smdn.Text.Ondulish
+// AssemblyVersion: 4.0.0.0
+// InformationalVersion: 4.0.0-preview1+e47f0cd7079b79c34ed00e252d68a337f739d938
+// TargetFramework: .NETStandard,Version=v2.1
+// Configuration: Release
+#nullable enable annotations
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using MeCab;
+
+namespace MeCab {
+ public class DictionaryInfo : IDisposable {
+ protected bool swigCMemOwn;
+
+ public DictionaryInfo() {}
+
+ public string charset { get; }
+ public string filename { get; }
+ public uint lsize { get; }
+ public DictionaryInfo next { get; }
+ public uint rsize { get; }
+ public uint size { get; }
+ public int type { get; }
+ public ushort version { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~DictionaryInfo() {}
+ }
+
+ public class Lattice : IDisposable {
+ protected bool swigCMemOwn;
+
+ public Lattice() {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Lattice() {}
+ public virtual double Z() {}
+ public virtual void add_request_type(int request_type) {}
+ public virtual Node begin_nodes(uint pos) {}
+ public virtual Node bos_node() {}
+ public virtual int boundary_constraint(uint pos) {}
+ public virtual void clear() {}
+ public virtual Node end_nodes(uint pos) {}
+ public virtual string enumNBestAsString(uint N) {}
+ public virtual Node eos_node() {}
+ public virtual string feature_constraint(uint pos) {}
+ public virtual bool has_constraint() {}
+ public virtual bool has_request_type(int request_type) {}
+ public virtual bool is_available() {}
+ public virtual Node newNode() {}
+ public virtual bool next() {}
+ public virtual void remove_request_type(int request_type) {}
+ public virtual int request_type() {}
+ public virtual string sentence() {}
+ public virtual void set_Z(double Z) {}
+ public virtual void set_boundary_constraint(uint pos, int boundary_constraint_type) {}
+ public virtual void set_feature_constraint(uint begin_pos, uint end_pos, string feature) {}
+ public virtual void set_request_type(int request_type) {}
+ public virtual void set_result(string result) {}
+ public void set_sentence(string sentence) {}
+ public virtual void set_theta(float theta) {}
+ public virtual void set_what(string str) {}
+ public virtual uint size() {}
+ public virtual float theta() {}
+ public virtual string toString() {}
+ public virtual string toString(Node node) {}
+ public virtual string what() {}
+ }
+
+ public class MeCab {
+ public static readonly int MECAB_ALLOCATE_SENTENCE = 64;
+ public static readonly int MECAB_ALL_MORPHS = 32;
+ public static readonly int MECAB_ALTERNATIVE = 16;
+ public static readonly int MECAB_ANY_BOUNDARY = 0;
+ public static readonly int MECAB_BOS_NODE = 2;
+ public static readonly int MECAB_EON_NODE = 4;
+ public static readonly int MECAB_EOS_NODE = 3;
+ public static readonly int MECAB_INSIDE_TOKEN = 2;
+ public static readonly int MECAB_MARGINAL_PROB = 8;
+ public static readonly int MECAB_NBEST = 2;
+ public static readonly int MECAB_NOR_NODE = 0;
+ public static readonly int MECAB_ONE_BEST = 1;
+ public static readonly int MECAB_PARTIAL = 4;
+ public static readonly int MECAB_SYS_DIC = 0;
+ public static readonly int MECAB_TOKEN_BOUNDARY = 1;
+ public static readonly int MECAB_UNK_DIC = 2;
+ public static readonly int MECAB_UNK_NODE = 1;
+ public static readonly int MECAB_USR_DIC = 1;
+ public static readonly string VERSION = "0.996";
+
+ public MeCab() {}
+ }
+
+ public class Model : IDisposable {
+ public static string version() {}
+
+ protected bool swigCMemOwn;
+
+ public Model() {}
+ public Model(string argc) {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Model() {}
+ public virtual Lattice createLattice() {}
+ public virtual Tagger createTagger() {}
+ public virtual DictionaryInfo dictionary_info() {}
+ public virtual Node lookup(string begin, string end, Lattice lattice) {}
+ public virtual bool swap(Model model) {}
+ public virtual int transition_cost(ushort rcAttr, ushort lcAttr) {}
+ }
+
+ public class Node : IDisposable {
+ protected bool swigCMemOwn;
+
+ public float alpha { get; }
+ public float beta { get; }
+ public Node bnext { get; }
+ public byte char_type { get; }
+ public int cost { get; }
+ public Node enext { get; }
+ public string feature { get; }
+ public uint id { get; }
+ public byte isbest { get; }
+ public ushort lcAttr { get; }
+ public ushort length { get; }
+ public Path lpath { get; }
+ public Node next { get; }
+ public ushort posid { get; }
+ public Node prev { get; }
+ public float prob { get; set; }
+ public ushort rcAttr { get; }
+ public ushort rlength { get; }
+ public Path rpath { get; }
+ public byte stat { get; }
+ public string surface { get; }
+ public short wcost { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Node() {}
+ }
+
+ public class Path : IDisposable {
+ protected bool swigCMemOwn;
+
+ public int cost { get; }
+ public Path lnext { get; }
+ public Node lnode { get; }
+ public float prob { get; set; }
+ public Path rnext { get; }
+ public Node rnode { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Path() {}
+ }
+
+ public class Tagger : IDisposable {
+ public static bool parse(Model model, Lattice lattice) {}
+ public static string version() {}
+
+ protected bool swigCMemOwn;
+
+ public Tagger() {}
+ public Tagger(string argc) {}
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ ~Tagger() {}
+ public virtual bool all_morphs() {}
+ public virtual DictionaryInfo dictionary_info() {}
+ public virtual string formatNode(Node node) {}
+ public virtual int lattice_level() {}
+ public virtual string next() {}
+ public virtual Node nextNode() {}
+ public virtual bool parse(Lattice lattice) {}
+ public virtual string parse(string str) {}
+ public virtual string parseNBest(uint N, string str) {}
+ public virtual bool parseNBestInit(string str) {}
+ public virtual Node parseToNode(string str) {}
+ public string parseToString(string str) {}
+ public string parseToString(string str, uint length) {}
+ public virtual bool partial() {}
+ public virtual int request_type() {}
+ public virtual void set_all_morphs(bool all_morphs) {}
+ public virtual void set_lattice_level(int level) {}
+ public virtual void set_partial(bool @partial) {}
+ public virtual void set_request_type(int request_type) {}
+ public virtual void set_theta(float theta) {}
+ public virtual float theta() {}
+ public virtual string what() {}
+ }
+}
+
+namespace Smdn.Text.Ondulish {
+ public static class KanaUtils {
+ public static string ConvertWideHiraganaToKatakana(string input) {}
+ public static string ConvertWideKatakanaToHiragana(string input) {}
+ public static string ConvertWideKatakanaToNarrowKatakana(string input) {}
+ }
+
+ public class Translator : IDisposable {
+ public static Tagger CreateTaggerForBundledDictionary() {}
+
+ public Translator() {}
+ public Translator(Tagger tagger, bool shouldDisposeTagger) {}
+
+ public IReadOnlyDictionary<string, string> PhraseDictionary { get; }
+ public IReadOnlyDictionary<string, string> WordDictionary { get; }
+
+ protected virtual void Dispose(bool disposing) {}
+ public void Dispose() {}
+ public string Translate(string input, bool convertKatakanaToNarrow = true) {}
+ public void Translate(TextReader input, TextWriter output, bool convertKatakanaToNarrow = true) {}
+ public void Translate(string input, TextWriter output, bool convertKatakanaToNarrow = true) {}
+ }
+}
diff --git a/src/Smdn.Text.Ondulish/MeCab.targets b/src/Smdn.Text.Ondulish/MeCab.targets
new file mode 100644
index 0000000..d47b7ee
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/MeCab.targets
@@ -0,0 +1,51 @@
+<!--
+SPDX-FileCopyrightText: 2022 smdn <smdn@smdn.jp>
+SPDX-License-Identifier: MIT
+-->
+<Project>
+ <PropertyGroup>
+ <BuildInParallel>false</BuildInParallel> <!-- disable parallel builds to avoid running simultaneous MeCab build -->
+ </PropertyGroup>
+
+ <Target
+ Name="MeCabBindings_Build"
+ BeforeTargets="DispatchToInnerBuilds;BeforeBuild"
+ >
+ <Exec
+ Command="make install-buildtime-deps-ubuntu.22.04 -f install-deps.mk"
+ WorkingDirectory="$(MSBuildThisFileDirectory)..\..\eng\dependencies\"
+ Condition="
+ '$(GITHUB_ACTIONS)' == 'true' and
+ $([System.Runtime.InteropServices.RuntimeInformation]::RuntimeIdentifier.StartsWith('ubuntu.22.04'))
+ "
+ />
+
+ <!-- generate MeCab shared library and SWIG bindings -->
+ <Exec
+ Command="make mecab-bindings"
+ WorkingDirectory="$(MeCabMakefileDirectory)"
+ />
+
+ <!-- make sure to include generated SWIG binding source files to the <Compile> items -->
+ <ItemGroup>
+ <Compile Include="$(MeCabBindingsDirectory)src\*.cs" />
+ </ItemGroup>
+
+ <!-- generate MeCab IPA dictionary -->
+ <Exec
+ Command="make mecab-ipadic"
+ WorkingDirectory="$(MeCabMakefileDirectory)"
+ />
+ </Target>
+
+ <Target
+ Name="MeCabBindings_Clean"
+ AfterTargets="Clean"
+ >
+ <!-- clean MeCab shared library and swig bindings -->
+ <Exec
+ Command="make clean-mecab-bindings"
+ WorkingDirectory="$(MeCabMakefileDirectory)"
+ />
+ </Target>
+</Project>
diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish.csproj b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish.csproj
new file mode 100644
index 0000000..3b8c4b0
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish.csproj
@@ -0,0 +1,132 @@
+<!--
+SPDX-FileCopyrightText: 2012 smdn <smdn@smdn.jp>
+SPDX-License-Identifier: MIT
+-->
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <TargetFrameworks>net6.0;netstandard2.1</TargetFrameworks>
+ <RuntimeIdentifiers>ubuntu.22.04-x64</RuntimeIdentifiers>
+ <VersionPrefix>4.0.0</VersionPrefix>
+ <VersionSuffix>preview1</VersionSuffix>
+ <!-- <PackageValidationBaselineVersion>4.0.0</PackageValidationBaselineVersion> -->
+ <Nullable>enable</Nullable>
+ <AssemblyCLSCompliant>false</AssemblyCLSCompliant>
+ <GenerateNupkgReadmeFileDependsOnTargets>$(GenerateNupkgReadmeFileDependsOnTargets);GenerateReadmeFileContent</GenerateNupkgReadmeFileDependsOnTargets>
+ </PropertyGroup>
+
+ <PropertyGroup>
+ <MeCabMakefileDirectory>$(MSBuildThisFileDirectory)..\MeCab\</MeCabMakefileDirectory>
+ <MeCabBindingsDirectory>$(MSBuildThisFileDirectory)..\MeCab\mecab-bindings\</MeCabBindingsDirectory>
+ <MeCabIpaDicDirectory>$(MSBuildThisFileDirectory)..\MeCab\mecab\mecab-ipadic\</MeCabIpaDicDirectory>
+ <MeCabDeploymentBasePath>mecab\</MeCabDeploymentBasePath>
+ <MeCabIpaDicDeploymentBasePath>$(MeCabDeploymentBasePath)dic\ipadic\</MeCabIpaDicDeploymentBasePath>
+ </PropertyGroup>
+
+ <PropertyGroup Label="assembly attributes">
+ <Description>A text conversion library that provides translation features from Japanese to Ondulish.</Description>
+ <CopyrightYear>2012</CopyrightYear>
+ </PropertyGroup>
+
+ <PropertyGroup Label="package properties">
+ <PackageTags>joke;funny;text-converter;translator</PackageTags>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <PackageReference Include="Smdn.Fundamental.Csv" Version="[3.1.0,4.0.0)" />
+ <PackageReference Include="Smdn.Fundamental.String.Replacement" Version="[3.0.2,4.0.0)" />
+ <ProjectReference
+ Include="$(MSBuildThisFileDirectory)..\Smdn.Text.Ondulish.Dictionaries\Smdn.Text.Ondulish.Dictionaries.csproj"
+ VersionRange="[4.0.0,5.0.0)"
+ AdditionalProperties="IsBuildDueToProjectReference=true"
+ />
+ </ItemGroup>
+
+ <ItemGroup>
+ <!-- Third party notice -->
+ <None
+ Include="$(MSBuildThisFileDirectory)..\..\ThirdPartyNotices.md"
+ Pack="true"
+ PackagePath="ThirdPartyNotices.md"
+ CopyToOutputDirectory="None"
+ />
+
+ <!-- MeCab bindings source -->
+ <!--
+ These files will be added by MeCab.targets during the MSBuild execution phase.
+ <Compile Include="$(MeCabBindingsDirectory)src\*.cs" />
+ -->
+
+ <!-- MeCab shared library -->
+ <None
+ Include="$(MeCabBindingsDirectory)runtimes\ubuntu.22.04-x64\native\libmecab.so"
+ Pack="true"
+ PackagePath="runtimes\ubuntu.22.04-x64\native\libmecab.so"
+ CopyToOutputDirectory="PreserveNewest"
+ />
+
+ <!-- MeCab IPA dictionary files -->
+ <Content Include="$(MeCabIpaDicDirectory)char.bin" />
+ <Content Include="$(MeCabIpaDicDirectory)dicrc" />
+ <Content Include="$(MeCabIpaDicDirectory)left-id.def" />
+ <Content Include="$(MeCabIpaDicDirectory)matrix.bin" />
+ <Content Include="$(MeCabIpaDicDirectory)pos-id.def" />
+ <Content Include="$(MeCabIpaDicDirectory)rewrite.def" />
+ <Content Include="$(MeCabIpaDicDirectory)right-id.def" />
+ <Content Include="$(MeCabIpaDicDirectory)sys.dic" />
+ <Content Include="$(MeCabIpaDicDirectory)unk.dic" />
+
+ <Content
+ Update="$(MeCabIpaDicDirectory)*"
+ TargetPath="$(MeCabIpaDicDeploymentBasePath)$([System.IO.Path]::GetFileName('%(Identity)'))"
+ CopyToOutputDirectory="PreserveNewest"
+ Pack="true"
+ PackagePath="contentFiles\any\any\$([System.IO.Path]::TrimEndingDirectorySeparator('$(MeCabIpaDicDeploymentBasePath)'))"
+ PackageCopyToOutput="true"
+ />
+
+ <!-- MeCab configuration file -->
+ <Content
+ Include="null.mecabrc"
+ TargetPath="$(MeCabDeploymentBasePath)%(Filename)%(Extension)"
+ CopyToOutputDirectory="PreserveNewest"
+ Pack="true"
+ PackagePath="contentFiles\any\any\$([System.IO.Path]::TrimEndingDirectorySeparator('$(MeCabDeploymentBasePath)'))"
+ PackageCopyToOutput="true"
+ />
+ </ItemGroup>
+
+ <Target Name="GenerateReadmeFileContent">
+ <ItemGroup>
+ <_SupportedRuntimeIdentifier Include="$(RuntimeIdentifiers)" />
+ <_SupportedRuntimeIdentifierMarkdownList Include="- `%(_SupportedRuntimeIdentifier.Identity)`" />
+ </ItemGroup>
+
+ <PropertyGroup>
+ <PackageReadmeFileContent><![CDATA[# $(AssemblyName)-$(InformationalVersion)
+$(Description)
+
+## Usage
+```cs
+$([System.IO.File]::ReadAllText('$(MSBuildThisFileDirectory)..\..\examples\hello-ondulish-world\Program.cs'))
+```
+
+## MeCab bindings for .NET
+This package includes [MeCab](https://github.com/taku910/mecab.git) bindings for .NET and supports the platforms represented by the following [RID](https://learn.microsoft.com/dotnet/core/rid-catalog)s.
+@(_SupportedRuntimeIdentifierMarkdownList, '%0A')
+
+For other platforms, a wrapper library for the bindings must be built and deployed separately.
+]]></PackageReadmeFileContent>
+ </PropertyGroup>
+
+ <!-- append licence notice to package readme -->
+ <PropertyGroup>
+ <ThirdPartyNoticesMarkdownText>$([System.IO.File]::ReadAllText('$(MSBuildThisFileDirectory)..\..\ThirdPartyNotices.md'))</ThirdPartyNoticesMarkdownText>
+ <PackageReadmeFileContentPostamble>$(PackageReadmeFileContentPostamble)$(ThirdPartyNoticesMarkdownText)</PackageReadmeFileContentPostamble>
+ </PropertyGroup>
+ </Target>
+
+ <ImportGroup>
+ <Import Project="$(MSBuildThisFileDirectory)MeCab.targets" />
+ </ImportGroup>
+
+</Project>
diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs
new file mode 100644
index 0000000..2092ef7
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/KanaUtils.cs
@@ -0,0 +1,114 @@
+// SPDX-FileCopyrightText: 2012 smdn <smdn@smdn.jp>
+// SPDX-License-Identifier: MIT
+
+using System;
+using System.Text;
+
+namespace Smdn.Text.Ondulish;
+
+public static class KanaUtils {
+ private const char WideHiraganaStart = '\u3041';
+ private const char WideHiraganaEnd = '\u3096';
+
+ private const char WideKatakanaStart = '\u30a1';
+ private const char WideKatakanaEnd = '\u30f6';
+
+ private const int OffsetFromHiraganaToKatakana = WideKatakanaStart - WideHiraganaStart;
+
+ private const char WideKatakanaExEnd = '\u30fa';
+
+ private static readonly string[] WideToNarrowKatakanaMap = new[] {
+ "ァ", "ア", "ィ", "イ", "ゥ", "ウ", "ェ", "エ", "ォ", "オ", "カ", "ガ", "キ", "ギ", "ク", // 30A1 - 30AF
+ "グ", "ケ", "ゲ", "コ", "ゴ", "サ", "ザ", "シ", "ジ", "ス", "ズ", "セ", "ゼ", "ソ", "ゾ", "タ", // 30B0 - 30BF
+ "ダ", "チ", "ヂ", "ッ", "ツ", "ヅ", "テ", "デ", "ト", "ド", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", // 30C0 - 30CF
+ "バ", "パ", "ヒ", "ビ", "ピ", "フ", "ブ", "プ", "ヘ", "ベ", "ペ", "ホ", "ボ", "ポ", "マ", "ミ", // 30D0 - 30DF
+ "ム", "メ", "モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ヮ", "ワ", // 30E0 - 30EF
+ "ヰ", "ヱ", "ヲ", "ン", "ヴ", "ヵ", "ヶ", "ヷ", "ヸ", "ヹ", "ヺ", // 30F0 - 30FA
+ };
+
+ public static string ConvertWideHiraganaToKatakana(string input)
+ {
+ if (input is null)
+ throw new ArgumentNullException(nameof(input));
+ if (input.Length == 0)
+ return string.Empty;
+
+#if SYSTEM_STRING_CREATE
+ return string.Create(input.Length, input, static (chars, s) => {
+ for (var index = 0; index < chars.Length; index++) {
+ chars[index] = s[index] is >= WideHiraganaStart and <= WideHiraganaEnd
+ ? (char)(s[index] + OffsetFromHiraganaToKatakana)
+ : s[index];
+ }
+ });
+#else
+ var outputChars = new char[input.Length];
+
+ for (var index = 0; index < input.Length; index++) {
+ outputChars[index] = input[index] is >= WideHiraganaStart and <= WideHiraganaEnd
+ ? (char)(input[index] + OffsetFromHiraganaToKatakana)
+ : outputChars[index] = input[index];
+ }
+
+ return new string(outputChars);
+#endif
+ }
+
+ public static string ConvertWideKatakanaToHiragana(string input)
+ {
+ if (input is null)
+ throw new ArgumentNullException(nameof(input));
+ if (input.Length == 0)
+ return string.Empty;
+
+#if SYSTEM_STRING_CREATE
+ return string.Create(input.Length, input, static (chars, s) => {
+ for (var index = 0; index < chars.Length; index++) {
+ chars[index] = s[index] is >= WideKatakanaStart and <= WideKatakanaEnd
+ ? (char)(s[index] - OffsetFromHiraganaToKatakana)
+ : s[index];
+ }
+ });
+#else
+ var outputChars = new char[input.Length];
+
+ for (var index = 0; index < input.Length; index++) {
+ outputChars[index] = input[index] is >= WideKatakanaStart and <= WideKatakanaEnd
+ ? (char)(input[index] - OffsetFromHiraganaToKatakana)
+ : input[index];
+ }
+
+ return new string(outputChars);
+#endif
+ }
+
+ public static string ConvertWideKatakanaToNarrowKatakana(string input)
+ {
+ if (input is null)
+ throw new ArgumentNullException(nameof(input));
+ if (input.Length == 0)
+ return string.Empty;
+
+ var output = new StringBuilder(input.Length * 2);
+
+ for (var index = 0; index < input.Length; index++) {
+ output.Append(
+ input[index] switch {
+ >= WideKatakanaStart and <= WideKatakanaExEnd => WideToNarrowKatakanaMap[input[index] - WideKatakanaStart],
+ 'ー' => 'ー',
+ '゛' => '゙',
+ '゜' => '゚',
+ '?' => '?',
+ '!' => '!',
+ '、' => '、',
+ '。' => '。',
+ ',' => ',',
+ '.' => '.',
+ _ => input[index],
+ }
+ );
+ }
+
+ return output.ToString();
+ }
+}
diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/ReadOnlyOrderedDictionary.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/ReadOnlyOrderedDictionary.cs
new file mode 100644
index 0000000..6519550
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/ReadOnlyOrderedDictionary.cs
@@ -0,0 +1,43 @@
+// SPDX-FileCopyrightText: 2012 smdn <smdn@smdn.jp>
+// SPDX-License-Identifier: MIT
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Smdn.Text.Ondulish;
+
+internal sealed class ReadOnlyOrderedDictionary<TKey, TValue> : IReadOnlyDictionary<TKey, TValue> {
+ private readonly IReadOnlyList<KeyValuePair<TKey, TValue>> dictionary;
+
+ public TValue this[TKey key] => throw new NotImplementedException();
+ public IEnumerable<TKey> Keys => throw new NotImplementedException();
+ public IEnumerable<TValue> Values => throw new NotImplementedException();
+ public int Count => dictionary.Count;
+
+ public ReadOnlyOrderedDictionary(IEnumerable<(TKey Key, TValue Value)> dictionary)
+ : this(
+ (dictionary ?? throw new ArgumentNullException(nameof(dictionary)))
+ .Select(static pair => new KeyValuePair<TKey, TValue>(pair.Key, pair.Value))
+ .ToList()
+ )
+ { }
+
+ public ReadOnlyOrderedDictionary(IReadOnlyList<KeyValuePair<TKey, TValue>> dictionary)
+ {
+ this.dictionary = dictionary ?? throw new ArgumentNullException(nameof(dictionary));
+ }
+
+ public bool ContainsKey(TKey key)
+ => throw new NotImplementedException();
+
+ public IEnumerator<KeyValuePair<TKey, TValue>> GetEnumerator()
+ => dictionary.GetEnumerator();
+
+ public bool TryGetValue(TKey key, out TValue value)
+ => throw new NotImplementedException();
+
+ IEnumerator IEnumerable.GetEnumerator()
+ => dictionary.GetEnumerator();
+}
diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.Dictionaries.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.Dictionaries.cs
new file mode 100644
index 0000000..7c82c31
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.Dictionaries.cs
@@ -0,0 +1,127 @@
+// SPDX-FileCopyrightText: 2012 smdn <smdn@smdn.jp>
+// SPDX-License-Identifier: MIT
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+using Smdn.Formats.Csv;
+
+namespace Smdn.Text.Ondulish;
+
+#pragma warning disable IDE0040
+partial class Translator {
+#pragma warning restore IDE0040
+
+ public IReadOnlyDictionary<string, string> PhraseDictionary { get; }
+ public IReadOnlyDictionary<string, string> WordDictionary { get; }
+
+ private sealed class WordDictionaryComparer : IComparer<string> {
+ public int Compare(string? x, string? y)
+ {
+ x ??= string.Empty;
+ y ??= string.Empty;
+
+ return x.Length == y.Length
+ ? StringComparer.Ordinal.Compare(x, y)
+ : y.Length - x.Length;
+ }
+ }
+
+ private static readonly char[] dictionaryPunctuationChars = new[] { '!', '?', '!', '?', '、', '。' };
+
+ private static SortedList<string, string> LoadDictionary(Stream stream)
+ {
+ var dictionary = new SortedList<string, string>(new WordDictionaryComparer());
+
+ using var reader = new CsvReader(stream, Encoding.UTF8);
+
+ foreach (var entries in reader.ReadRecords()) {
+ if (entries.Count < 3)
+ continue;
+
+ var entry = entries[0].Trim();
+
+ if (entry.StartsWith('#'))
+ continue; // comment line
+
+ var key = entries[1].Trim().RemoveChars(dictionaryPunctuationChars);
+
+ dictionary[KanaUtils.ConvertWideHiraganaToKatakana(key)] = entries[2].Trim();
+ }
+
+ return dictionary;
+ }
+
+ private static readonly IReadOnlyDictionary<string, string> phonemeDictionary =
+ new ReadOnlyOrderedDictionary<string, string>(
+ new[] {
+ // 最優先
+ ("ル", "ドゥ"),
+ ("ム", "ヴ"),
+ ("ボー", "ポッ"),
+ ("ドー", "ドゥー"),
+ ("スナ", "スダ"),
+ ("スルナ", "ドゥルダ"),
+ ("スル", "ドゥル"),
+ ("デモ", "デロ"),
+ ("ンヤ", "ッニャ"),
+ ("ネイ", "ニッ"),
+ ("ネエ", "ニェ"),
+ ("デス", "ディス"),
+ ("ウラ", "ルラ"),
+ ("トオ", "ドーゥ"),
+ ("いじゃ", "チョナ"),
+ ("とは", "トヴァ"),
+
+ // 母音
+ ("ア", "ア゛"),
+ ("ウ", "ル"),
+ ("ヤ", "ャ"),
+
+ // 摩擦音
+ ("サ", "ザァ"),
+ ("ス", "ズ"),
+ ("ゼ", "デ"),
+
+ ("ハ", "ヴァ"),
+ ("ヒ", "ビィ"),
+ ("フ", "ヴ"),
+ ("ヘ", "ベ"),
+ ("ホ", "ボ"),
+
+ ("ブ", "ム"),
+
+ ("ゼ", "デ"),
+
+ // 破裂音
+ ("ク", "グ"),
+ ("キ", "ク"),
+
+ ("タ", "ダ"),
+ ("チ", "ディ"),
+ ("ツ", "ヅ"),
+ ("テ", "デ"),
+ ("ト", "ドゥ"),
+
+ ("ピ", "ヴィ"),
+
+ // 鼻音
+ ("ニ", "ディ"),
+ ("ヌ", "ズ"),
+ ("ネ", "ベ"),
+ ("ノ", "ド"),
+
+ ("マ", "バ"),
+ ("ミ", "ヴィ"),
+ ("メ", "ベ"),
+ ("モ", "ボ"),
+
+ // 流音
+ ("リ", "ディ"),
+ ("レ", "リ"),
+ ("ロ", "ド"),
+ }
+ );
+}
diff --git a/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs
new file mode 100644
index 0000000..6f87100
--- /dev/null
+++ b/src/Smdn.Text.Ondulish/Smdn.Text.Ondulish/Translator.cs
@@ -0,0 +1,285 @@
+// SPDX-FileCopyrightText: 2012 smdn <smdn@smdn.jp>
+// SPDX-License-Identifier: MIT
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+
+using MeCab;
+
+using MeCabConsts = MeCab.MeCab;
+
+namespace Smdn.Text.Ondulish;
+
+public partial class Translator : IDisposable {
+ private const string MeCabDeploymentDirectory = "mecab";
+ private const bool ConvertKatakanaToNarrowDefaultValue = true;
+
+ private Tagger? tagger;
+ private readonly bool shouldDisposeTagger;
+
+ private void ThrowIfDisposed()
+ {
+ if (tagger is null)
+ throw new ObjectDisposedException(GetType().FullName);
+ }
+
+ public static Tagger CreateTaggerForBundledDictionary()
+ {
+ var assemblyDirectory = System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);
+ var mecabDeploymentDirectoryPath = string.IsNullOrEmpty(assemblyDirectory)
+ ? MeCabDeploymentDirectory // fallback: use relative path from current directory
+ : System.IO.Path.Join(assemblyDirectory, MeCabDeploymentDirectory);
+
+ var pathToMeCabResourceFile = System.IO.Path.Join(mecabDeploymentDirectoryPath, "null.mecabrc");
+ var pathToMeCabDictionaryDirectory = System.IO.Path.Join(mecabDeploymentDirectoryPath, "dic", "ipadic");
+
+ var taggerArgs = $"-r {pathToMeCabResourceFile} -d {pathToMeCabDictionaryDirectory}";
+
+ return new Tagger(taggerArgs);
+ }
+
+ public Translator()
+ : this(
+ tagger: CreateTaggerForBundledDictionary(),
+ shouldDisposeTagger: true
+ )
+ {
+ }
+
+ public Translator(
+ Tagger tagger,
+ bool shouldDisposeTagger
+ )
+ {
+ if (tagger is null)
+ throw new ArgumentNullException(nameof(tagger));
+
+ this.tagger = tagger;
+ this.shouldDisposeTagger = shouldDisposeTagger;
+
+ // load Ondulish dictionaries from assembly Smdn.Text.Ondulish.Dictionaries
+ try {
+ using var stream = OndulishDictionaries.OpenPhraseDictionaryStream();
+
+ PhraseDictionary = LoadDictionary(stream);
+ }
+ catch {
+ // ignore exceptions
+ PhraseDictionary = CreateEmptyDictionary();
+ }
+
+ try {
+ using var stream = OndulishDictionaries.OpenWordDictionaryStream();
+
+ WordDictionary = LoadDictionary(stream);
+ }
+ catch {
+ // ignore exceptions
+ WordDictionary = CreateEmptyDictionary();
+ }
+
+ static IReadOnlyDictionary<string, string> CreateEmptyDictionary()
+ => Enumerable.Empty<(string Key, string Value)>().ToDictionary(static pair => pair.Key, static pair => pair.Value);
+ }
+
+ public void Dispose()
+ {
+ Dispose(disposing: true);
+ GC.SuppressFinalize(this);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (shouldDisposeTagger && tagger is not null)
+ tagger.Dispose();
+
+ tagger = null;
+ }
+
+ public string Translate(
+ string input,
+ bool convertKatakanaToNarrow = ConvertKatakanaToNarrowDefaultValue
+ )
+ {
+ if (input is null)
+ throw new ArgumentNullException(nameof(input));
+
+ ThrowIfDisposed();
+
+ if (input.Length == 0)
+ return string.Empty;
+
+ var sb = new StringBuilder(input.Length * 2);
+
+ Translate(
+ input: new StringReader(input),
+ output: new StringWriter(sb),
+ convertKatakanaToNarrow: convertKatakanaToNarrow
+ );
+
+ return sb.ToString();
+ }
+
+ public void Translate(
+ string input,
+ TextWriter output,
+ bool convertKatakanaToNarrow = ConvertKatakanaToNarrowDefaultValue
+ )
+ => Translate(
+ input: new StringReader(input ?? throw new ArgumentNullException(nameof(input))),
+ output: output ?? throw new ArgumentNullException(nameof(output)),
+ convertKatakanaToNarrow: convertKatakanaToNarrow
+ );
+
+ public void Translate(
+ TextReader input,
+ TextWriter output,
+ bool convertKatakanaToNarrow = ConvertKatakanaToNarrowDefaultValue
+ )
+ {
+ if (input is null)
+ throw new ArgumentNullException(nameof(input));
+ if (output is null)
+ throw new ArgumentNullException(nameof(output));
+
+ ThrowIfDisposed();
+
+ var firstLine = true;
+
+ for (var line = input.ReadLine(); line is not null; line = input.ReadLine()) {
+ if (firstLine)
+ firstLine = false;
+ else
+ output.WriteLine();
+
+ if (string.IsNullOrWhiteSpace(line)) {
+ output.Write(line);
+ continue;
+ }
+
+ var fragments =
+ ConvertWithDictionary(
+ ConvertToKatakana(line),
+ PhraseDictionary
+ )
+ .SelectMany(f =>
+ f.ConvertedText is null
+ ? ConvertWithDictionary(f.SourceText, WordDictionary)
+ : Enumerable.Repeat(f, 1)
+ )
+ .SelectMany(f =>
+ f.ConvertedText is null
+ ? ConvertWithDictionary(f.SourceText, phonemeDictionary)
+ : Enumerable.Repeat(f, 1)
+ )
+ .Select(static f =>
+ new TextFragment(
+ f.SourceText,
+ f.ConvertedText ?? KanaUtils.ConvertWideHiraganaToKatakana(f.SourceText) // redundant?
+ )
+ );
+
+ if (convertKatakanaToNarrow) {
+ fragments = fragments.Select(static f =>
+ new TextFragment(
+ f.SourceText,
+ f.ConvertedText is null
+ ? null
+ : KanaUtils.ConvertWideKatakanaToNarrowKatakana(f.ConvertedText)
+ )
+ );
+ }
+
+ foreach (var fragment in fragments) {
+ output.Write(fragment.ConvertedText);
+ }
+ }
+
+ output.Flush();
+ }
+
+ private static readonly char[] featureSplitter = new[] { ',' };
+
+ private string ConvertToKatakana(string input)
+ {
+ input = input.Replace(",", ","); // XXX: feature splitter
+
+ var ret = new StringBuilder(input.Length * 2);
+
+ for (var node = tagger!.parseToNode(input); node != null; node = node.next) {
+ if (node.stat == MeCabConsts.MECAB_BOS_NODE || node.stat == MeCabConsts.MECAB_EOS_NODE)
+ continue;
+
+ var featureEntries = node.feature.Split(featureSplitter);
+
+ if (8 <= featureEntries.Length) {
+ switch (featureEntries[6]) {
+ case "ぶっ殺す": ret.Append("ブッコロス"); break; // ipadic says 'ぶっとばす'
+ default: ret.Append(featureEntries[7]); break;
+ }
+ }
+ else {
+ ret.Append(node.surface);
+ }
+ }
+
+ return ret.ToString();
+ }
+
+ private readonly struct TextFragment {
+ public readonly string SourceText;
+ public readonly string? ConvertedText;
+
+ public TextFragment(string sourceText, string? convertedText)
+ {
+ SourceText = sourceText;
+ ConvertedText = convertedText;
+ }
+ }
+
+ private static bool FindMostLeftAndLongestCandidate(
+ string input,
+ int startIndex,
+ IReadOnlyDictionary<string, string> dictionary,
+ out int position,
+ out KeyValuePair<string, string> candidate
+ )
+ {
+ position = int.MaxValue;
+ candidate = default;
+
+ foreach (var entry in dictionary) {
+ var pos = input.IndexOf(entry.Key, startIndex, StringComparison.Ordinal);
+
+ if (0 <= pos && pos < position) {
+ position = pos;
+ candidate = entry;
+ }
+ }
+
+ return position != int.MaxValue;
+ }
+
+ private static IEnumerable<TextFragment> ConvertWithDictionary(
+ string input,
+ IReadOnlyDictionary<string, string> dictionary
+ )
+ {
+ var offset = 0;
+
+ while (FindMostLeftAndLongestCandidate(input, offset, dictionary, out var position, out var candidate)) {
+ if (offset < position)
+ yield return new TextFragment(input.Substring(offset, position - offset), null);
+
+ yield return new TextFragment(candidate.Key, candidate.Value);
+
+ offset = position + candidate.Key.Length;
+ }
+
+ yield return new TextFragment(input.Substring(offset), null);
+ }
+}
diff --git a/src/Smdn.Text.Ondulish/null.mecabrc b/src/Smdn.Text.Ondulish/null.mecabrc
new file mode 100644
index 0000000..e69de29
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment