Skip to content

Instantly share code, notes, and snippets.

@sinnlosses
Created November 9, 2016 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sinnlosses/6bfba928e6f972e62c2678bfc490f957 to your computer and use it in GitHub Desktop.
Save sinnlosses/6bfba928e6f972e62c2678bfc490f957 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <wchar.h>
int print_size_char(wchar_t c);
int main(int argc,char *argv[])
{
FILE *fin, *fout;
if ( (fin = fopen(argv[1],"rb,ccs=UTF-8") )== NULL ) return -1;
fout = fopen("output.txt","wb,ccs=UTF-8");
printf("ファイル読み込み完了\n");
wchar_t c;
wchar_t space = ' ';
int i = 0,j, csize;
while(1){
c = fgetwc(fin);
if (c == EOF) { break; }
else if (c == ' ') { continue; }
else if (c == '\n') { fwprintf(fout,L"\n","UTF-8"); }
else {
fwprintf(fout,L"%c",c);
csize = print_size_char(c);
for (j=0;j<csize-1;j++){
c = fgetwc(fin);
fwprintf(fout,L"%c",c);
}
fwprintf(fout,L" ");
}
i++;
if (i%200000 == 0) printf("%d_clear\n",i);
}
fclose (fin);
fclose (fout);
return 0;
}
int print_size_char(wchar_t c)
{
int char_size;
if (c < 0x80) {
char_size = 1;
} else if (c < 0xE0) {
char_size = 2;
} else if (c < 0xF0) {
char_size = 3;
} else {
char_size = 4;
}
return char_size;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment