pango-language.c
pango/pango-language.c at master · GNOME/pango · GitHub
#include "config.h" #include <errno.h> #include <string.h> #include <stdlib.h> #include <math.h> #include <locale.h> #include "pango-language.h" #include "pango-impl-utils.h" #ifdef HAVE_CORE_TEXT #include <CoreFoundation/CoreFoundation.h> #endif /* HAVE_CORE_TEXT */
typedef struct { gconstpointer lang_info; gconstpointer script_for_lang; int magic; /* Used for verification */ } PangoLanguagePrivate; #define PANGO_LANGUAGE_PRIVATE_MAGIC 0x0BE4DAD0 static void pango_language_private_init (PangoLanguagePrivate *priv) { priv->magic = PANGO_LANGUAGE_PRIVATE_MAGIC; priv->lang_info = (gconstpointer) -1; priv->script_for_lang = (gconstpointer) -1; }
static PangoLanguagePrivate *
pango_language_get_private (
PangoLanguage *language) G_GNUC_CONST;
static PangoLanguagePrivate * pango_language_get_private ( PangoLanguage *language) { PangoLanguagePrivate *priv; if (!language) return NULL; priv = (PangoLanguagePrivate *) ((char *)language - sizeof (PangoLanguagePrivate)); if (G_UNLIKELY (priv->magic != PANGO_LANGUAGE_PRIVATE_MAGIC)) { g_critical ("Invalid PangoLanguage. Did you pass in a straight string instead of calling pango_language_from_string()?"); return NULL; } return priv; }
#define LANGUAGE_SEPARATORS ";:, \t" static const char canon_map[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 };
static gboolean lang_equal (gconstpointer v1, gconstpointer v2) { const guchar *p1 = v1; const guchar *p2 = v2; while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2]) { p1++, p2++; } return (canon_map[*p1] == canon_map[*p2]); }
static guint lang_hash (gconstpointer key) { const guchar *p = key; guint h = 0; while (canon_map[*p]) { h = (h << 5) - h + canon_map[*p]; p++; } return h; }
static PangoLanguage * pango_language_copy (PangoLanguage *language) { return language; /* language tags are const */ }
static void pango_language_free (PangoLanguage *language G_GNUC_UNUSED) { return; /* nothing */ } G_DEFINE_BOXED_TYPE (PangoLanguage, pango_language, pango_language_copy, pango_language_free);
static gchar * _pango_get_lc_ctype (void) { #ifdef G_OS_WIN32 /* Somebody might try to set the locale for this process using the * LANG or LC_ environment variables. The Microsoft C library * doesn't know anything about them. You set the locale in the * Control Panel. Setting these env vars won't have any affect on * locale-dependent C library functions like ctime(). But just for * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes * it easier to test GTK and Pango in various default languages, you * don't have to clickety-click in the Control Panel, you can simply * start the program with LC_ALL=something on the command line.) */ gchar *p; p = getenv ("LC_ALL"); if (p != NULL) return g_strdup (p); p = getenv ("LC_CTYPE"); if (p != NULL) return g_strdup (p); p = getenv ("LANG"); if (p != NULL) return g_strdup (p); return g_win32_getlocale (); #elif defined(HAVE_CORE_TEXT) CFArrayRef languages; CFStringRef language; gchar ret[16]; gchar *p; /* Take the same approach as done for Windows above. First we check * if somebody tried to set the locale through environment variables. */ p = getenv ("LC_ALL"); if (p != NULL) return g_strdup (p); p = getenv ("LC_CTYPE"); if (p != NULL) return g_strdup (p); p = getenv ("LANG"); if (p != NULL) return g_strdup (p); /* If the environment variables are not set, determine the locale * through the platform-native API. */ languages = CFLocaleCopyPreferredLanguages (); language = CFArrayGetValueAtIndex (languages, 0); if (!CFStringGetCString (language, ret, 16, kCFStringEncodingUTF8)) { CFRelease (languages); return g_strdup (setlocale (LC_CTYPE, NULL)); } CFRelease (languages); return g_strdup (ret); #else { gchar *lc_ctype = setlocale (LC_CTYPE, NULL); if (lc_ctype) return g_strdup (lc_ctype); else return g_strdup ("C"); } #endif }
PangoLanguage * pango_language_get_default (void) { static PangoLanguage *result = NULL; /* MT-safe */ if (g_once_init_enter (&result)) { gchar *lc_ctype; PangoLanguage *lang; lc_ctype = _pango_get_lc_ctype (); lang = pango_language_from_string (lc_ctype); g_free (lc_ctype); g_once_init_leave (&result, lang); } return result; }
PangoLanguage * pango_language_from_string (const char *language) { G_LOCK_DEFINE_STATIC (lang_from_string); static GHashTable *hash = NULL; /* MT-safe */ PangoLanguagePrivate *priv; char *result; int len; char *p; if (language == NULL) return NULL; G_LOCK (lang_from_string); if (G_UNLIKELY (!hash)) hash = g_hash_table_new (lang_hash, lang_equal); else { result = g_hash_table_lookup (hash, language); if (result) goto out; } len = strlen (language); result = g_malloc0 (sizeof (PangoLanguagePrivate) + len + 1); g_assert (result); priv = (PangoLanguagePrivate *) result; result += sizeof (*priv); pango_language_private_init (priv); p = result; while ((*(p++) = canon_map[*(guchar *)language++])) ; g_hash_table_insert (hash, result, result); out: G_UNLOCK (lang_from_string); return (PangoLanguage *)result; }
const char * (pango_language_to_string) (PangoLanguage *language) { return pango_language_to_string (language); }
gboolean pango_language_matches (PangoLanguage *language, const char *range_list) { const char *lang_str = pango_language_to_string (language); const char *p = range_list; gboolean done = FALSE; while (!done) { const char *end = strpbrk (p, LANGUAGE_SEPARATORS); if (!end) { end = p + strlen (p); done = TRUE; } if (strncmp (p, "*", 1) == 0 || (lang_str && strncmp (lang_str, p, end - p) == 0 && (lang_str[end - p] == '\0' || lang_str[end - p] == '-'))) return TRUE; if (!done) p = end + 1; } return FALSE; }
static int lang_compare_first_component (gconstpointer pa, gconstpointer pb) { const char *a = pa, *b = pb; unsigned int da, db; const char *p; p = strstr (a, "-"); da = p ? (unsigned int) (p - a) : strlen (a); p = strstr (b, "-"); db = p ? (unsigned int) (p - b) : strlen (b); return strncmp (a, b, MAX (da, db)); }
static gconstpointer find_best_lang_match (PangoLanguage *language, gconstpointer records, guint num_records, guint record_size) { const char *lang_str; const char *record, *start, *end; if (language == NULL) return NULL; lang_str = pango_language_to_string (language); record = bsearch (lang_str, records, num_records, record_size, lang_compare_first_component); if (!record) return NULL; start = (const char *) records; end = start + num_records * record_size; /* find the best match among all those that have the same first-component */ /* go to the final one matching in the first component */ while (record < end - record_size && lang_compare_first_component (lang_str, record + record_size) == 0) record += record_size; /* go back, find which one matches completely */ while (start <= record && lang_compare_first_component (lang_str, record) == 0) { if (pango_language_matches (language, record)) return record; record -= record_size; } return NULL; }
static gconstpointer find_best_lang_match_cached (PangoLanguage *language, gconstpointer *cache, gconstpointer records, guint num_records, guint record_size) { gconstpointer result; if (G_LIKELY (cache && *cache != (gconstpointer) -1)) return *cache; result = find_best_lang_match (language, records, num_records, record_size); if (cache) *cache = result; return result; }
#define FIND_BEST_LANG_MATCH_CACHED(language, cache_key, records) \ find_best_lang_match_cached ((language), pango_language_get_private (language) ? \ &(pango_language_get_private (language)->cache_key) : NULL, \ records, \ G_N_ELEMENTS (records), \ sizeof (*records));
typedef struct { char lang[6]; guint16 offset; } LangInfo; /* Pure black magic, based on appendix of dsohowto.pdf */ #define POOLSTRFIELD(line) POOLSTRFIELD1(line) #define POOLSTRFIELD1(line) str##line struct _LangPoolStruct { char str0[1]; #define LANGUAGE(id, source, sample) char POOLSTRFIELD(__LINE__)[sizeof(sample)]; #include "pango-language-sample-table.h" #undef LANGUAGE }; static const union _LangPool { struct _LangPoolStruct lang_pool_struct; const char str[1]; } lang_pool = { { "", #define LANGUAGE(id, source, sample) sample, #include "pango-language-sample-table.h" #undef LANGUAGE } }; static const LangInfo lang_texts[] = { #define LANGUAGE(id, source, sample) { G_STRINGIFY(id), G_STRUCT_OFFSET(struct _LangPoolStruct, POOLSTRFIELD(__LINE__)) }, #include "pango-language-sample-table.h" #undef LANGUAGE /* One extra entry with no final comma, to make it C89-happy */ {"~~", 0} };
const char * pango_language_get_sample_string (PangoLanguage *language) { const LangInfo *lang_info; if (!language) language = pango_language_get_default (); lang_info = FIND_BEST_LANG_MATCH_CACHED (language, lang_info, lang_texts); if (lang_info) return lang_pool.str + lang_info->offset; return "The quick brown fox jumps over the lazy dog."; }
/* * From language to script */ #include "pango-script-lang-table.h" const PangoScript * pango_language_get_scripts (PangoLanguage *language, int *num_scripts) { const PangoScriptForLang *script_for_lang; unsigned int j; script_for_lang = FIND_BEST_LANG_MATCH_CACHED (language, script_for_lang, pango_script_for_lang); if (!script_for_lang) { if (num_scripts) *num_scripts = 0; return NULL; } if (num_scripts) { for (j = 0; j < G_N_ELEMENTS (script_for_lang->scripts); j++) if (script_for_lang->scripts[j] == 0) break; g_assert (j > 0); *num_scripts = j; } return script_for_lang->scripts; }
gboolean pango_language_includes_script (PangoLanguage *language, PangoScript script) { const PangoScript *scripts; int num_scripts, j; /* copied from the one in pango-script.c */ #define REAL_SCRIPT(script) \ ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN) if (!REAL_SCRIPT (script)) return TRUE; #undef REAL_SCRIPT scripts = pango_language_get_scripts (language, &num_scripts); if (!scripts) return TRUE; for (j = 0; j < num_scripts; j++) if (scripts[j] == script) return TRUE; return FALSE; }
/* * From script to language */ static PangoLanguage ** parse_default_languages (void) { char *p, *p_copy; gboolean done = FALSE; GArray *langs; p = getenv ("PANGO_LANGUAGE"); if (p == NULL) p = getenv ("LANGUAGE"); if (p == NULL) return NULL; p_copy = p = g_strdup (p); langs = g_array_new (TRUE, FALSE, sizeof (PangoLanguage *)); while (!done) { char *end = strpbrk (p, LANGUAGE_SEPARATORS); if (!end) { end = p + strlen (p); done = TRUE; } else *end = '\0'; /* skip empty languages, and skip the language 'C' */ if (p != end && !(p + 1 == end && *p == 'C')) { PangoLanguage *l = pango_language_from_string (p); g_array_append_val (langs, l); } if (!done) p = end + 1; } g_free (p_copy); return (PangoLanguage **) g_array_free (langs, FALSE); }
static PangoLanguage * _pango_script_get_default_language (PangoScript script) { G_LOCK_DEFINE_STATIC (languages); static gboolean initialized = FALSE; /* MT-safe */ static PangoLanguage * const * languages = NULL; /* MT-safe */ static GHashTable *hash = NULL; /* MT-safe */ PangoLanguage *result, * const * p; G_LOCK (languages); if (G_UNLIKELY (!initialized)) { languages = parse_default_languages (); if (languages) hash = g_hash_table_new (NULL, NULL); initialized = TRUE; } if (!languages) { result = NULL; goto out; } if (g_hash_table_lookup_extended (hash, GINT_TO_POINTER (script), NULL, (gpointer *) (gpointer) &result)) goto out; for (p = languages; *p; p++) if (pango_language_includes_script (*p, script)) break; result = *p; g_hash_table_insert (hash, GINT_TO_POINTER (script), result); out: G_UNLOCK (languages); return result; }
PangoLanguage * pango_script_get_sample_language (PangoScript script) { static const char sample_languages[][4] = { "", /* PANGO_SCRIPT_COMMON */ }; const char *sample_language; PangoLanguage *result; g_return_val_if_fail (script >= 0, NULL); if ((guint)script >= G_N_ELEMENTS (sample_languages)) return NULL; result = _pango_script_get_default_language (script); if (result) return result; sample_language = sample_languages[script]; if (!sample_language[0]) return NULL; else return pango_language_from_string (sample_language); }