Skip to content

Instantly share code, notes, and snippets.

@sebgod
Last active August 6, 2016 02:40
Show Gist options
  • Save sebgod/de276427514b9e57e951 to your computer and use it in GitHub Desktop.
Save sebgod/de276427514b9e57e951 to your computer and use it in GitHub Desktop.
Testing pure Win32 console output vs io.print/write
@setlocal enabledelayedexpansion
@chcp 850 2>nul 1>nul
@set JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF8
@if "%~1" EQU "" @(
set grades=mercury --libgrades-exclude stseg --libgrades-exclude trseg
set grades=!grades! --libgrades-exclude pregen --libgrades-exclude memprof
set grades=!grades! --output-libgrades
) else (
set grades=echo %*
)
@for /F "usebackq" %%G in (`%grades%`) do @(
echo Building for %%G
for %%C in (%PATHEXT%) do @if exist win_unicode_%%G%%C @del win_unicode_%%G%%C
call mercury --use-grade-subdirs --grade %%G --target-env-type windows -m win_unicode 2>&1
if errorlevel 1 (
echo Failed to build for grade %%G
) else (
for %%C in (%PATHEXT%) do @(
if exist win_unicode%%C ren win_unicode%%C win_unicode_%%G%%C
)
)
)
@for /F "usebackq" %%G in (`%grades%`) do @(
for %%C in (%PATHEXT%) do @(
if exist win_unicode_%%G%%C call win_unicode_%%G%%C
)
)
#!/bin/sh
if [ $# -gt 0 ] ; then
GRADES="$*"
else
GRADES=`mmc --output-libgrades`
fi
for G in $GRADES
do
echo Building for $G
if [ -x win_unicode_${G} ]
then
rm win_unicode_${G}
fi
mmc -r --use-grade-subdirs --grade ${G} -m win_unicode && mv win_unicode win_unicode_${G}
done
for G in $GRADES
do
if [ -x win_unicode_${G} ]
then
./win_unicode_${G}
fi
done
%----------------------------------------------------------------------------%
% vim: ft=mercury ff=unix ts=4 sw=4 et
%----------------------------------------------------------------------------%
% File: win_unicode.m
% Copyright © 2014 Sebastian Godelet
% Main author: Sebastian Godelet <sebastian.godelet+github@gmail.com>
% Created on: Sat 14 Jun 15:25:52 CEST 2014
% Stability: medium-low
%----------------------------------------------------------------------------%
% A minimal working example for demonstrating different behaviour between
% the native stdio fprintf function and io.write_string in respect to
% MS Windows broken Unicode handling in the console runtime.
%----------------------------------------------------------------------------%
:- module win_unicode.
:- interface.
:- import_module io.
%----------------------------------------------------------------------------%
:- pred main(io::di, io::uo) is det.
%----------------------------------------------------------------------------%
%----------------------------------------------------------------------------%
:- implementation.
:- import_module bool.
:- import_module int.
:- import_module list.
:- import_module pair.
:- import_module require.
:- import_module stream.
:- import_module string.
:- import_module univ.
%----------------------------------------------------------------------------%
:- pred stdprint(string::in, io::di, io::uo) is det.
:- type encoding.
:- pred encoding_equal(encoding::in, encoding::in) is semidet.
:- pred get_output_encoding(encoding::out, io::di, io::uo) is det.
:- pred set_output_encoding(encoding::in, bool::out, io::di, io::uo) is det.
:- func string_encoding = encoding.
:- func utf8_encoding = encoding.
:- func encoding_name(encoding) = string.
%----------------------------------------------------------------------------%
%
% stdprint, printing to standard output using the native print function:
%
% * C calling fprintf with stdout as the FILE*
% * C# calling System.Console.Write
% * Java calling System.out.print
%
:- pragma foreign_decl("C", "#include <stdio.h>"). % for fprintf()
:- pragma foreign_proc("C", stdprint(Text::in, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
fprintf(stdout, ""%s"", Text);
").
%----------------------------------------------------------------------------%
:- pragma foreign_proc("C#", stdprint(Text::in, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
System.Console.Write(Text);
").
%----------------------------------------------------------------------------%
:- pragma foreign_proc("Java", stdprint(Text::in, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
System.out.print(Text);
").
%----------------------------------------------------------------------------%
%
% set_output_encoding succeeds if the current encoding is equal to the
% desired encoding.
%
% It delegates the actual work to `set_output_encoding_priv/3', which _should_
% be specified by a grade- or implementation-specific predicate.
%
% By default, it fails semi-deterministically, since there is no platform-
% independant way to solve this.
%
set_output_encoding(Encoding, Success, !IO) :-
get_output_encoding(Encoding0, !IO),
( Encoding0 = Encoding ->
Success = yes
;
set_output_encoding_priv(Encoding, Success, !IO)
).
:- pred set_output_encoding_priv(encoding::in, bool::out, io::di, io::uo)
is det.
set_output_encoding_priv(_, no, !IO).
%----------------------------------------------------------------------------%
%
% Implemtation of encodings in C for Windows and Posix
%
% On Windows Codepages are represented by integers, using the CP_XXX constants
% defined in the WinAPI. We will use this for Posix as well, to simplify the
% implementation
%
% CP_UTF8 = 65001
% CP_UTF16 = 1200
%
:- pragma foreign_decl("C",
"
#ifndef MR_WIN32
#include <locale.h> /* for setlocale() */
#include <string.h> /* for strlen() */
#endif
#ifndef CP_UTF8
#define CP_UTF8 65001
#endif
#ifndef CP_UTF16
#define CP_UTF16 1200
#endif
#ifndef CP_UNDEFINED
#define CP_UNDEFINED -1
#endif
").
:- pragma foreign_type("C", encoding, "int")
where equality is encoding_equal.
:- pragma foreign_proc("C",
encoding_equal(EncodingA::in, EncodingB::in),
[will_not_call_mercury, promise_pure],
"
SUCCESS_INDICATOR = (EncodingA == EncodingB) ? MR_TRUE : MR_FALSE;
").
:- pragma foreign_proc("C",
encoding_name(Encoding::in) = (Name::out),
[will_not_call_mercury, promise_pure],
"
switch (Encoding) {
case CP_UTF8: Name = (MR_String)""UTF-8""; break;
case CP_UTF16: Name = (MR_String)""UTF-16""; break;
default: Name = (MR_String)""<unknown>""; break;
}
").
:- pragma foreign_proc("C",
string_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
Encoding = CP_UTF8;
").
:- pragma foreign_proc("C",
utf8_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
Encoding = CP_UTF8;
").
:- pragma foreign_proc("C",
get_output_encoding(Encoding::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
#ifdef MR_WIN32
Encoding = GetConsoleOutputCP();
#else
size_t size;
char *locale;
locale = setlocale(LC_ALL, """");
size = strlen(locale);
if (size >= 5 &&
locale[size-5] == 'U' &&
locale[size-4] == 'T' &&
locale[size-3] == 'F' &&
locale[size-2] == '-' &&
locale[size-1] == '8')
{
Encoding = CP_UTF8;
} else {
Encoding = CP_UNDEFINED;
}
#endif
").
:- pragma foreign_proc("C",
set_output_encoding_priv(Encoding::in, Success::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
#ifdef MR_WIN32
Success = SetConsoleOutputCP(Encoding) ? MR_TRUE : MR_FALSE;
#else
Success = MR_FALSE;
#endif
").
%----------------------------------------------------------------------------%
%
% Implemtation of encodings in C#.
%
% Essentially, the Microsoft .NET implementation just wraps the WinAPI
% functions (AFAIK)
%
:- pragma foreign_type("C#", encoding, "System.Text.Encoding")
where equality is encoding_equal.
:- pragma foreign_proc("C#",
encoding_equal(EndcodingA::in, EncodingB::in),
[will_not_call_mercury, promise_pure],
"
SUCCESS_INDICATOR = !object.ReferenceEquals(EndcodingA, null)
&& EndcodingA.Equals(EncodingB);
").
:- pragma foreign_proc("C#",
encoding_name(Encoding::in) = (Name::out),
[will_not_call_mercury, promise_pure],
"
Name = Encoding.WebName.ToUpperInvariant();
").
:- pragma foreign_proc("C#",
string_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
bool bigEndian = !System.BitConverter.IsLittleEndian;
Encoding = new System.Text.UnicodeEncoding(bigEndian, false);
").
:- pragma foreign_proc("C#",
utf8_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
Encoding = new System.Text.UTF8Encoding(false);
").
:- pragma foreign_proc("C#",
get_output_encoding(Encoding::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
Encoding = System.Console.OutputEncoding;
").
:- pragma foreign_proc("C#",
set_output_encoding_priv(Encoding::in, Success::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
try {
System.Console.OutputEncoding = Encoding;
Success = mr_bool.YES;
} catch {
Success = mr_bool.NO;
}
").
%----------------------------------------------------------------------------%
%
% Implemtation of encodings in Java.
%
% Using java.nio.charset.Charset
%
% TODO: This is still incomplete
%
:- pragma foreign_type("Java", encoding, "java.nio.charset.Charset")
where equality is encoding_equal.
:- pragma foreign_proc("Java",
encoding_equal(EncodingA::in, EncodingB::in),
[will_not_call_mercury, promise_pure],
"
SUCCESS_INDICATOR = EncodingA != null &&
EncodingA.equals(EncodingB);
").
:- pragma foreign_proc("Java",
encoding_name(Encoding::in) = (Name::out),
[will_not_call_mercury, promise_pure],
"
Name = Encoding.name();
").
:- pragma foreign_proc("Java",
string_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
Encoding = java.nio.charset.Charset.forName(""UTF-16"");
").
:- pragma foreign_proc("Java",
utf8_encoding = (Encoding::out),
[will_not_call_mercury, promise_pure],
"
Encoding = java.nio.charset.Charset.forName(""UTF-8"");
").
:- pragma foreign_proc("Java",
get_output_encoding(Encoding::out, _IO0::di, _IO::uo),
[will_not_call_mercury, promise_pure],
"
Encoding = java.nio.charset.Charset.forName(
System.getProperty(""file.encoding""));
").
%----------------------------------------------------------------------------%
main(!IO) :-
set_output_encoding(utf8_encoding, IsUtf8Encoding, !IO),
Writers = [
"io.write_string"-io.write_string,
"native printing function"-stdprint,
"foldl(io.write_char)"-write_chars
],
( IsUtf8Encoding = yes ->
do_test(Writers, !IO)
;
unexpected($file, $pred, $grade ++ " does not support UTF-8")
),
( string_encoding = utf8_encoding ->
true
;
set_output_encoding(string_encoding, IsStringEncoding, !IO),
( IsStringEncoding = yes ->
do_test(Writers, !IO)
;
unexpected($file, $pred,
$grade ++ " does not support " ++
encoding_name(string_encoding))
)
).
%----------------------------------------------------------------------------%
%
% The `writer' predicate encapsulates the different I/O implementations for
% demonstrating different encoding behaviour on the OS standard output.
%
:- type writer_pred == pred(string, io, io).
:- inst writer_pred == (pred(in, di, uo) is det).
%----------------------------------------------------------------------------%
%
% `write_chars' folds over a string using io.write_char.
%
:- pred write_chars(string::in, io::di, io::uo) is det.
write_chars(Text, !IO) :-
list.foldl(io.write_char, to_char_list(Text), !IO).
%----------------------------------------------------------------------------%
%
% `do_test/3' uses the list of writers to iteratively test the string obtained
% from `func unicode_text/0'.
%
:- pred do_test(list(pair(string, writer_pred)), io, io).
:- mode do_test(in(list(pair(ground, writer_pred))), di, uo) is det.
do_test([], !IO).
do_test([Name-Writer | Writers], !IO) :-
Writer("\n\n", !IO),
Writer("Executing test ", !IO),
Writer(Name, !IO),
Writer(" in: ", !IO),
Writer($grade, !IO),
Writer(" codepage: ", !IO),
get_output_encoding(Encoding, !IO),
Writer(encoding_name(Encoding), !IO),
Writer("\n", !IO),
Writer(unicode_text, !IO),
do_test(Writers, !IO).
:- func unicode_text = string.
unicode_text =
"English: texts, web pages and documents
Graves,etc: à á â ã ä å æ ç è é ê ë ì í î ï
Greek: ΐ Α Β Γ Δ Ε Ζ Η Θ Ι Κ Λ Μ Ν Ξ Ο
Romanian: texte, pagini Web şi a documentelor
Vietnamese: văn bản, các trang web và các tài liệu
Russian: тексты, веб-страницы и документы
Japanese: テキスト、Webページや文書
Korean: 텍스트, 웹 페이지 및 문서
Chinese: 文本,網頁和文件
".
:- initialise init/2.
:- pred init(io::di, io::uo) is det.
init(!IO) :-
get_output_encoding(OldEncoding, !IO),
type_to_univ(OldEncoding, Globals),
set_globals(Globals, !IO).
:- finalise fin/2.
:- pred fin(io::di, io::uo) is det.
fin(!IO) :-
get_globals(Globals, !IO),
( univ_to_type(Globals, OldEncoding) ->
set_output_encoding(OldEncoding, _, !IO)
;
unexpected($file, $pred, "Cannot cast univ Globals to encoding")
).
%----------------------------------------------------------------------------%
:- end_module win_unicode.
%----------------------------------------------------------------------------%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment