utf8-utils.c 1.88 KB
Newer Older
1
/* Freetype GL - A C OpenGL Freetype engine
2
 *
3 4 5
 * Distributed under the OSI-approved BSD 2-Clause License.  See accompanying
 * file `LICENSE` for more details.
 */
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#include <string.h>
#include "utf8-utils.h"

// ----------------------------------------------------- utf8_surrogate_len ---
size_t
utf8_surrogate_len( const char* character )
{
    size_t result = 0;
    char test_char;

    if (!character)
        return 0;

    test_char = character[0];

    if ((test_char & 0x80) == 0)
        return 1;

    while (test_char & 0x80)
    {
        test_char <<= 1;
        result++;
    }

    return result;
}

// ------------------------------------------------------------ utf8_strlen ---
size_t
utf8_strlen( const char* string )
{
    const char* ptr = string;
    size_t result = 0;

    while (*ptr)
    {
        ptr += utf8_surrogate_len(ptr);
        result++;
    }

    return result;
}

uint32_t
utf8_to_utf32( const char * character )
{
    if( !character )
    {
54
        return -1;
55 56 57 58
    }

    if( ( character[0] & 0x80 ) == 0x0 )
    {
59
        return character[0];
60 61
    }

62
    if( ( character[0] & 0xE0 ) == 0xC0 )
63
    {
64
        return ( ( character[0] & 0x3F ) << 6 ) | ( character[1] & 0x3F );
65 66
    }

67
    if( ( character[0] & 0xF0 ) == 0xE0 )
68
    {
69
        return ( ( character[0] & 0x1F ) << ( 6 + 6 ) ) | ( ( character[1] & 0x3F ) << 6 ) | ( character[2] & 0x3F );
70 71
    }

72
    if( ( character[0] & 0xF8 ) == 0xF0 )
73
    {
74
        return ( ( character[0] & 0x0F ) << ( 6 + 6 + 6 ) ) | ( ( character[1] & 0x3F ) << ( 6 + 6 ) ) | ( ( character[2] & 0x3F ) << 6 ) | ( character[3] & 0x3F );
75 76
    }

77
    if( ( character[0] & 0xFC ) == 0xF8 )
78
    {
79
        return ( ( character[0] & 0x07 ) << ( 6 + 6 + 6 + 6 ) ) | ( ( character[1] & 0x3F ) << ( 6 + 6 + 6 ) ) | ( ( character[2] & 0x3F ) << ( 6 + 6 ) ) | ( ( character[3] & 0x3F ) << 6 ) | ( character[4] & 0x3F );
80 81
    }

82
    return 0xFFFD; // invalid character
83
}