Last active
August 6, 2016 02:40
-
-
Save sebgod/de276427514b9e57e951 to your computer and use it in GitHub Desktop.
Testing pure Win32 console output vs io.print/write
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@setlocal enabledelayedexpansion | |
@chcp 850 2>nul 1>nul | |
@set JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF8 | |
@if "%~1" EQU "" @( | |
set grades=mercury --libgrades-exclude stseg --libgrades-exclude trseg | |
set grades=!grades! --libgrades-exclude pregen --libgrades-exclude memprof | |
set grades=!grades! --output-libgrades | |
) else ( | |
set grades=echo %* | |
) | |
@for /F "usebackq" %%G in (`%grades%`) do @( | |
echo Building for %%G | |
for %%C in (%PATHEXT%) do @if exist win_unicode_%%G%%C @del win_unicode_%%G%%C | |
call mercury --use-grade-subdirs --grade %%G --target-env-type windows -m win_unicode 2>&1 | |
if errorlevel 1 ( | |
echo Failed to build for grade %%G | |
) else ( | |
for %%C in (%PATHEXT%) do @( | |
if exist win_unicode%%C ren win_unicode%%C win_unicode_%%G%%C | |
) | |
) | |
) | |
@for /F "usebackq" %%G in (`%grades%`) do @( | |
for %%C in (%PATHEXT%) do @( | |
if exist win_unicode_%%G%%C call win_unicode_%%G%%C | |
) | |
) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
if [ $# -gt 0 ] ; then | |
GRADES="$*" | |
else | |
GRADES=`mmc --output-libgrades` | |
fi | |
for G in $GRADES | |
do | |
echo Building for $G | |
if [ -x win_unicode_${G} ] | |
then | |
rm win_unicode_${G} | |
fi | |
mmc -r --use-grade-subdirs --grade ${G} -m win_unicode && mv win_unicode win_unicode_${G} | |
done | |
for G in $GRADES | |
do | |
if [ -x win_unicode_${G} ] | |
then | |
./win_unicode_${G} | |
fi | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%----------------------------------------------------------------------------% | |
% vim: ft=mercury ff=unix ts=4 sw=4 et | |
%----------------------------------------------------------------------------% | |
% File: win_unicode.m | |
% Copyright © 2014 Sebastian Godelet | |
% Main author: Sebastian Godelet <sebastian.godelet+github@gmail.com> | |
% Created on: Sat 14 Jun 15:25:52 CEST 2014 | |
% Stability: medium-low | |
%----------------------------------------------------------------------------% | |
% A minimal working example for demonstrating different behaviour between | |
% the native stdio fprintf function and io.write_string in respect to | |
% MS Windows broken Unicode handling in the console runtime. | |
%----------------------------------------------------------------------------% | |
:- module win_unicode. | |
:- interface. | |
:- import_module io. | |
%----------------------------------------------------------------------------% | |
:- pred main(io::di, io::uo) is det. | |
%----------------------------------------------------------------------------% | |
%----------------------------------------------------------------------------% | |
:- implementation. | |
:- import_module bool. | |
:- import_module int. | |
:- import_module list. | |
:- import_module pair. | |
:- import_module require. | |
:- import_module stream. | |
:- import_module string. | |
:- import_module univ. | |
%----------------------------------------------------------------------------% | |
:- pred stdprint(string::in, io::di, io::uo) is det. | |
:- type encoding. | |
:- pred encoding_equal(encoding::in, encoding::in) is semidet. | |
:- pred get_output_encoding(encoding::out, io::di, io::uo) is det. | |
:- pred set_output_encoding(encoding::in, bool::out, io::di, io::uo) is det. | |
:- func string_encoding = encoding. | |
:- func utf8_encoding = encoding. | |
:- func encoding_name(encoding) = string. | |
%----------------------------------------------------------------------------% | |
% | |
% stdprint, printing to standard output using the native print function: | |
% | |
% * C calling fprintf with stdout as the FILE* | |
% * C# calling System.Console.Write | |
% * Java calling System.out.print | |
% | |
:- pragma foreign_decl("C", "#include <stdio.h>"). % for fprintf() | |
:- pragma foreign_proc("C", stdprint(Text::in, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
fprintf(stdout, ""%s"", Text); | |
"). | |
%----------------------------------------------------------------------------% | |
:- pragma foreign_proc("C#", stdprint(Text::in, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
System.Console.Write(Text); | |
"). | |
%----------------------------------------------------------------------------% | |
:- pragma foreign_proc("Java", stdprint(Text::in, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
System.out.print(Text); | |
"). | |
%----------------------------------------------------------------------------% | |
% | |
% set_output_encoding succeeds if the current encoding is equal to the | |
% desired encoding. | |
% | |
% It delegates the actual work to `set_output_encoding_priv/3', which _should_ | |
% be specified by a grade- or implementation-specific predicate. | |
% | |
% By default, it fails semi-deterministically, since there is no platform- | |
% independant way to solve this. | |
% | |
set_output_encoding(Encoding, Success, !IO) :- | |
get_output_encoding(Encoding0, !IO), | |
( Encoding0 = Encoding -> | |
Success = yes | |
; | |
set_output_encoding_priv(Encoding, Success, !IO) | |
). | |
:- pred set_output_encoding_priv(encoding::in, bool::out, io::di, io::uo) | |
is det. | |
set_output_encoding_priv(_, no, !IO). | |
%----------------------------------------------------------------------------% | |
% | |
% Implemtation of encodings in C for Windows and Posix | |
% | |
% On Windows Codepages are represented by integers, using the CP_XXX constants | |
% defined in the WinAPI. We will use this for Posix as well, to simplify the | |
% implementation | |
% | |
% CP_UTF8 = 65001 | |
% CP_UTF16 = 1200 | |
% | |
:- pragma foreign_decl("C", | |
" | |
#ifndef MR_WIN32 | |
#include <locale.h> /* for setlocale() */ | |
#include <string.h> /* for strlen() */ | |
#endif | |
#ifndef CP_UTF8 | |
#define CP_UTF8 65001 | |
#endif | |
#ifndef CP_UTF16 | |
#define CP_UTF16 1200 | |
#endif | |
#ifndef CP_UNDEFINED | |
#define CP_UNDEFINED -1 | |
#endif | |
"). | |
:- pragma foreign_type("C", encoding, "int") | |
where equality is encoding_equal. | |
:- pragma foreign_proc("C", | |
encoding_equal(EncodingA::in, EncodingB::in), | |
[will_not_call_mercury, promise_pure], | |
" | |
SUCCESS_INDICATOR = (EncodingA == EncodingB) ? MR_TRUE : MR_FALSE; | |
"). | |
:- pragma foreign_proc("C", | |
encoding_name(Encoding::in) = (Name::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
switch (Encoding) { | |
case CP_UTF8: Name = (MR_String)""UTF-8""; break; | |
case CP_UTF16: Name = (MR_String)""UTF-16""; break; | |
default: Name = (MR_String)""<unknown>""; break; | |
} | |
"). | |
:- pragma foreign_proc("C", | |
string_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = CP_UTF8; | |
"). | |
:- pragma foreign_proc("C", | |
utf8_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = CP_UTF8; | |
"). | |
:- pragma foreign_proc("C", | |
get_output_encoding(Encoding::out, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
#ifdef MR_WIN32 | |
Encoding = GetConsoleOutputCP(); | |
#else | |
size_t size; | |
char *locale; | |
locale = setlocale(LC_ALL, """"); | |
size = strlen(locale); | |
if (size >= 5 && | |
locale[size-5] == 'U' && | |
locale[size-4] == 'T' && | |
locale[size-3] == 'F' && | |
locale[size-2] == '-' && | |
locale[size-1] == '8') | |
{ | |
Encoding = CP_UTF8; | |
} else { | |
Encoding = CP_UNDEFINED; | |
} | |
#endif | |
"). | |
:- pragma foreign_proc("C", | |
set_output_encoding_priv(Encoding::in, Success::out, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
#ifdef MR_WIN32 | |
Success = SetConsoleOutputCP(Encoding) ? MR_TRUE : MR_FALSE; | |
#else | |
Success = MR_FALSE; | |
#endif | |
"). | |
%----------------------------------------------------------------------------% | |
% | |
% Implemtation of encodings in C#. | |
% | |
% Essentially, the Microsoft .NET implementation just wraps the WinAPI | |
% functions (AFAIK) | |
% | |
:- pragma foreign_type("C#", encoding, "System.Text.Encoding") | |
where equality is encoding_equal. | |
:- pragma foreign_proc("C#", | |
encoding_equal(EndcodingA::in, EncodingB::in), | |
[will_not_call_mercury, promise_pure], | |
" | |
SUCCESS_INDICATOR = !object.ReferenceEquals(EndcodingA, null) | |
&& EndcodingA.Equals(EncodingB); | |
"). | |
:- pragma foreign_proc("C#", | |
encoding_name(Encoding::in) = (Name::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Name = Encoding.WebName.ToUpperInvariant(); | |
"). | |
:- pragma foreign_proc("C#", | |
string_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
bool bigEndian = !System.BitConverter.IsLittleEndian; | |
Encoding = new System.Text.UnicodeEncoding(bigEndian, false); | |
"). | |
:- pragma foreign_proc("C#", | |
utf8_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = new System.Text.UTF8Encoding(false); | |
"). | |
:- pragma foreign_proc("C#", | |
get_output_encoding(Encoding::out, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = System.Console.OutputEncoding; | |
"). | |
:- pragma foreign_proc("C#", | |
set_output_encoding_priv(Encoding::in, Success::out, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
try { | |
System.Console.OutputEncoding = Encoding; | |
Success = mr_bool.YES; | |
} catch { | |
Success = mr_bool.NO; | |
} | |
"). | |
%----------------------------------------------------------------------------% | |
% | |
% Implemtation of encodings in Java. | |
% | |
% Using java.nio.charset.Charset | |
% | |
% TODO: This is still incomplete | |
% | |
:- pragma foreign_type("Java", encoding, "java.nio.charset.Charset") | |
where equality is encoding_equal. | |
:- pragma foreign_proc("Java", | |
encoding_equal(EncodingA::in, EncodingB::in), | |
[will_not_call_mercury, promise_pure], | |
" | |
SUCCESS_INDICATOR = EncodingA != null && | |
EncodingA.equals(EncodingB); | |
"). | |
:- pragma foreign_proc("Java", | |
encoding_name(Encoding::in) = (Name::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Name = Encoding.name(); | |
"). | |
:- pragma foreign_proc("Java", | |
string_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = java.nio.charset.Charset.forName(""UTF-16""); | |
"). | |
:- pragma foreign_proc("Java", | |
utf8_encoding = (Encoding::out), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = java.nio.charset.Charset.forName(""UTF-8""); | |
"). | |
:- pragma foreign_proc("Java", | |
get_output_encoding(Encoding::out, _IO0::di, _IO::uo), | |
[will_not_call_mercury, promise_pure], | |
" | |
Encoding = java.nio.charset.Charset.forName( | |
System.getProperty(""file.encoding"")); | |
"). | |
%----------------------------------------------------------------------------% | |
main(!IO) :- | |
set_output_encoding(utf8_encoding, IsUtf8Encoding, !IO), | |
Writers = [ | |
"io.write_string"-io.write_string, | |
"native printing function"-stdprint, | |
"foldl(io.write_char)"-write_chars | |
], | |
( IsUtf8Encoding = yes -> | |
do_test(Writers, !IO) | |
; | |
unexpected($file, $pred, $grade ++ " does not support UTF-8") | |
), | |
( string_encoding = utf8_encoding -> | |
true | |
; | |
set_output_encoding(string_encoding, IsStringEncoding, !IO), | |
( IsStringEncoding = yes -> | |
do_test(Writers, !IO) | |
; | |
unexpected($file, $pred, | |
$grade ++ " does not support " ++ | |
encoding_name(string_encoding)) | |
) | |
). | |
%----------------------------------------------------------------------------% | |
% | |
% The `writer' predicate encapsulates the different I/O implementations for | |
% demonstrating different encoding behaviour on the OS standard output. | |
% | |
:- type writer_pred == pred(string, io, io). | |
:- inst writer_pred == (pred(in, di, uo) is det). | |
%----------------------------------------------------------------------------% | |
% | |
% `write_chars' folds over a string using io.write_char. | |
% | |
:- pred write_chars(string::in, io::di, io::uo) is det. | |
write_chars(Text, !IO) :- | |
list.foldl(io.write_char, to_char_list(Text), !IO). | |
%----------------------------------------------------------------------------% | |
% | |
% `do_test/3' uses the list of writers to iteratively test the string obtained | |
% from `func unicode_text/0'. | |
% | |
:- pred do_test(list(pair(string, writer_pred)), io, io). | |
:- mode do_test(in(list(pair(ground, writer_pred))), di, uo) is det. | |
do_test([], !IO). | |
do_test([Name-Writer | Writers], !IO) :- | |
Writer("\n\n", !IO), | |
Writer("Executing test ", !IO), | |
Writer(Name, !IO), | |
Writer(" in: ", !IO), | |
Writer($grade, !IO), | |
Writer(" codepage: ", !IO), | |
get_output_encoding(Encoding, !IO), | |
Writer(encoding_name(Encoding), !IO), | |
Writer("\n", !IO), | |
Writer(unicode_text, !IO), | |
do_test(Writers, !IO). | |
:- func unicode_text = string. | |
unicode_text = | |
"English: texts, web pages and documents | |
Graves,etc: à á â ã ä å æ ç è é ê ë ì í î ï | |
Greek: ΐ Α Β Γ Δ Ε Ζ Η Θ Ι Κ Λ Μ Ν Ξ Ο | |
Romanian: texte, pagini Web şi a documentelor | |
Vietnamese: văn bản, các trang web và các tài liệu | |
Russian: тексты, веб-страницы и документы | |
Japanese: テキスト、Webページや文書 | |
Korean: 텍스트, 웹 페이지 및 문서 | |
Chinese: 文本,網頁和文件 | |
". | |
:- initialise init/2. | |
:- pred init(io::di, io::uo) is det. | |
init(!IO) :- | |
get_output_encoding(OldEncoding, !IO), | |
type_to_univ(OldEncoding, Globals), | |
set_globals(Globals, !IO). | |
:- finalise fin/2. | |
:- pred fin(io::di, io::uo) is det. | |
fin(!IO) :- | |
get_globals(Globals, !IO), | |
( univ_to_type(Globals, OldEncoding) -> | |
set_output_encoding(OldEncoding, _, !IO) | |
; | |
unexpected($file, $pred, "Cannot cast univ Globals to encoding") | |
). | |
%----------------------------------------------------------------------------% | |
:- end_module win_unicode. | |
%----------------------------------------------------------------------------% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment