Skip to content

Instantly share code, notes, and snippets.

@jimregan
Last active November 3, 2016 06:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimregan/774410e7e81879b01bbe689d4cb8d3ed to your computer and use it in GitHub Desktop.
Save jimregan/774410e7e81879b01bbe689d4cb8d3ed to your computer and use it in GitHub Desktop.
Irish strlwr
/*
* Copyright 2016 Trinity College, Dublin
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
/*
* Checks if the string begins with an upper case vowel, for the set
* of vowels present in Irish; assumes UTF8.
*/
static int begins_with_upper_vowel(char* s)
{
if(s == NULL) {
return 0;
} else if (*s == 'A' || *s == 'E' || *s == 'I'
|| *s == 'O' || *s == 'U') {
return 1;
} else if (*s == '\xC3' && ((s + 1) != NULL)) {
if((*(s + 1) == '\x81') || (*(s + 1) == '\x89')
|| (*(s + 1) == '\x8D') || (*(s + 1) == '\x93')
|| (*(s + 1) == '\x9A')) {
return 1;
} else {
return 0;
}
} else {
return 0;
}
}
/*
* converts an Irish uppercase vowel with fada (ÁÉÍÓÚ)
* to its lowercase equivalent
*/
static int tolower_fada(int ch)
{
switch(ch) {
case '\x81':
return '\xa1';
case '\x89':
return '\xa9';
case '\x8D':
return '\xad';
case '\x93':
return '\xb3';
case '\x9A':
return '\xba';
default:
return ch;
}
}
char* strlwr_irish(char* s)
{
int start = 1;
int pcount = 0;
int size = strlen(s);
char* p = (char *) malloc(((size * 2) + 1) * sizeof(char));
char* ret = p;
if(p != NULL) {
while(*s != '\0') {
if(start && ((*s == 't') || (*s == 'n'))
&& ((s + 1) != NULL) && begins_with_upper_vowel(s + 1)) {
start = 0;
*p++ = *s++;
*p++ = '-';
pcount++;
} else if(*s >= 'A' && *s <= 'Z') {
*p++ = tolower(*s++);
pcount++;
} else if(*s == '\xC3') {
*p++ = (char) *s++;
pcount++;
*p++ = (char) tolower_fada(*s++);
pcount++;
} else if(*s >= 'a' && *s <= 'z') {
*p++ = *s++;
pcount++;
} else {
*p++ = *s++;
pcount++;
start = 1;
}
}
*p = '\0';
}
return ret;
}
int main(int argc, char** argv)
{
fprintf(stdout, "Starting\n");
char* test1 = "tATHAIR";
char* test2 = "nÁÉÍÓÚR tATHAIR tATHAIR";
char* out1 = strlwr_irish(test1);
char* out2 = strlwr_irish(test2);
printf("%s\n%s\n", test1, test2);
printf("%s\n%s\n", out1, out2);
printf("%d %d\n", strlen(test2), strlen(out2));
free(out1);
free(out2);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment