のねのBlog

パソコンの問題や、ソフトウェアの開発で起きた問題など書いていきます。よろしくお願いします^^。

pango-language.c

pango/pango-language.c at master · GNOME/pango · GitHub

#include "config.h"
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <locale.h>

#include "pango-language.h"
#include "pango-impl-utils.h"

#ifdef HAVE_CORE_TEXT
#include <CoreFoundation/CoreFoundation.h>
#endif /* HAVE_CORE_TEXT */
typedef struct {
  gconstpointer lang_info;
  gconstpointer script_for_lang;

  int magic; /* Used for verification */
} PangoLanguagePrivate;

#define PANGO_LANGUAGE_PRIVATE_MAGIC 0x0BE4DAD0

static void
pango_language_private_init (PangoLanguagePrivate *priv)
{
  priv->magic = PANGO_LANGUAGE_PRIVATE_MAGIC;

  priv->lang_info = (gconstpointer) -1;
  priv->script_for_lang = (gconstpointer) -1;
}
static PangoLanguagePrivate * 
    pango_language_get_private (
        PangoLanguage *language) G_GNUC_CONST;
static PangoLanguagePrivate *
    pango_language_get_private (
        PangoLanguage *language)
{
  PangoLanguagePrivate *priv;

  if (!language)
    return NULL;

  priv =         (PangoLanguagePrivate *) ((char *)language 
        - sizeof (PangoLanguagePrivate));

  if (G_UNLIKELY (priv->magic != PANGO_LANGUAGE_PRIVATE_MAGIC))
    {
      g_critical ("Invalid PangoLanguage.  
          Did you pass in a straight string instead of calling
          pango_language_from_string()?");
      return NULL;
    }

  return priv;
}
#define LANGUAGE_SEPARATORS ";:, \t"

static const char canon_map[256] = {
   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
};
static gboolean
lang_equal (gconstpointer v1,
	    gconstpointer v2)
{
  const guchar *p1 = v1;
  const guchar *p2 = v2;

  while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
    {
      p1++, p2++;
    }

  return (canon_map[*p1] == canon_map[*p2]);
}
static guint
lang_hash (gconstpointer key)
{
  const guchar *p = key;
  guint h = 0;
  while (canon_map[*p])
    {
      h = (h << 5) - h + canon_map[*p];
      p++;
    }

  return h;
}
static PangoLanguage *
    pango_language_copy (PangoLanguage *language)
{
  return language; /* language tags are const */
}
static void
    pango_language_free (PangoLanguage *language G_GNUC_UNUSED)
{
  return; /* nothing */
}

G_DEFINE_BOXED_TYPE (PangoLanguage, pango_language,
                     pango_language_copy,
                     pango_language_free);
static gchar *
_pango_get_lc_ctype (void)
{
#ifdef G_OS_WIN32
  /* Somebody might try to set the locale for this process using the
   * LANG or LC_ environment variables. The Microsoft C library
   * doesn't know anything about them. You set the locale in the
   * Control Panel. Setting these env vars won't have any affect on
   * locale-dependent C library functions like ctime(). But just for
   * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes
   * it easier to test GTK and Pango in various default languages, you
   * don't have to clickety-click in the Control Panel, you can simply
   * start the program with LC_ALL=something on the command line.)
   */

  gchar *p;

  p = getenv ("LC_ALL");
  if (p != NULL)
    return g_strdup (p);

  p = getenv ("LC_CTYPE");
  if (p != NULL)
    return g_strdup (p);

  p = getenv ("LANG");
  if (p != NULL)
    return g_strdup (p);

  return g_win32_getlocale ();
#elif defined(HAVE_CORE_TEXT)
  CFArrayRef languages;
  CFStringRef language;
  gchar ret[16];
  gchar *p;

  /* Take the same approach as done for Windows above. First we check
   * if somebody tried to set the locale through environment variables.
   */
  p = getenv ("LC_ALL");
  if (p != NULL)
    return g_strdup (p);

  p = getenv ("LC_CTYPE");
  if (p != NULL)
    return g_strdup (p);

  p = getenv ("LANG");
  if (p != NULL)
    return g_strdup (p);

  /* If the environment variables are not set, determine the locale
   * through the platform-native API.
   */
  languages = CFLocaleCopyPreferredLanguages ();
  language = CFArrayGetValueAtIndex (languages, 0);

  if (!CFStringGetCString (language, ret, 16, kCFStringEncodingUTF8))
    {
      CFRelease (languages);
      return g_strdup (setlocale (LC_CTYPE, NULL));
    }

  CFRelease (languages);

  return g_strdup (ret);
#else
  {
    gchar *lc_ctype = setlocale (LC_CTYPE, NULL);

    if (lc_ctype)
      return g_strdup (lc_ctype);
    else
      return g_strdup ("C");
  }
#endif
}
PangoLanguage *
pango_language_get_default (void)
{
  static PangoLanguage *result = NULL; /* MT-safe */

  if (g_once_init_enter (&result))
    {
      gchar *lc_ctype;
      PangoLanguage *lang;

      lc_ctype = _pango_get_lc_ctype ();
      lang = pango_language_from_string (lc_ctype);
      g_free (lc_ctype);

      g_once_init_leave (&result, lang);
    }

  return result;
}
PangoLanguage *
pango_language_from_string (const char *language)
{
  G_LOCK_DEFINE_STATIC (lang_from_string);
  static GHashTable *hash = NULL; /* MT-safe */
  PangoLanguagePrivate *priv;
  char *result;
  int len;
  char *p;

  if (language == NULL)
    return NULL;

  G_LOCK (lang_from_string);

  if (G_UNLIKELY (!hash))
    hash = g_hash_table_new (lang_hash, lang_equal);
  else
    {
      result = g_hash_table_lookup (hash, language);
      if (result)
        goto out;
    }

  len = strlen (language);
  result = g_malloc0 (sizeof (PangoLanguagePrivate) + len + 1);
  g_assert (result);

  priv = (PangoLanguagePrivate *) result;
  result += sizeof (*priv);

  pango_language_private_init (priv);

  p = result;
  while ((*(p++) = canon_map[*(guchar *)language++]))
    ;

  g_hash_table_insert (hash, result, result);

out:
  G_UNLOCK (lang_from_string);

  return (PangoLanguage *)result;
}
const char *
(pango_language_to_string) (PangoLanguage *language)
{
  return pango_language_to_string (language);
}
gboolean
pango_language_matches (PangoLanguage *language,
			const char    *range_list)
{
  const char *lang_str = pango_language_to_string (language);
  const char *p = range_list;
  gboolean done = FALSE;

  while (!done)
    {
      const char *end = strpbrk (p, LANGUAGE_SEPARATORS);
      if (!end)
	{
	  end = p + strlen (p);
	  done = TRUE;
	}

      if (strncmp (p, "*", 1) == 0 ||
	  (lang_str && strncmp (lang_str, p, end - p) == 0 &&
	   (lang_str[end - p] == '\0' || lang_str[end - p] == '-')))
	return TRUE;

      if (!done)
	p = end + 1;
    }

  return FALSE;
}
static int
lang_compare_first_component (gconstpointer pa,
			      gconstpointer pb)
{
  const char *a = pa, *b = pb;
  unsigned int da, db;
  const char *p;

  p = strstr (a, "-");
  da = p ? (unsigned int) (p - a) : strlen (a);

  p = strstr (b, "-");
  db = p ? (unsigned int) (p - b) : strlen (b);
   
  return strncmp (a, b, MAX (da, db));
}
static gconstpointer
find_best_lang_match (PangoLanguage *language,
		      gconstpointer  records,
		      guint          num_records,
		      guint          record_size)
{
  const char *lang_str;
  const char *record, *start, *end;

  if (language == NULL)
    return NULL;

  lang_str = pango_language_to_string (language);

  record = bsearch (lang_str,
		    records, num_records, record_size,
		    lang_compare_first_component);
  if (!record)
    return NULL;

  start = (const char *) records;
  end   = start + num_records * record_size;

  /* find the best match among all those that have the same first-component */

  /* go to the final one matching in the first component */
  while (record < end - record_size &&
	 lang_compare_first_component (lang_str, record + record_size) == 0)
    record += record_size;

  /* go back, find which one matches completely */
  while (start <= record &&
	 lang_compare_first_component (lang_str, record) == 0)
    {
      if (pango_language_matches (language, record))
        return record;

      record -= record_size;
    }

  return NULL;
}
static gconstpointer
find_best_lang_match_cached (PangoLanguage *language,
			     gconstpointer *cache,
			     gconstpointer  records,
			     guint          num_records,
			     guint          record_size)
{
  gconstpointer result;

  if (G_LIKELY (cache && *cache != (gconstpointer) -1))
    return *cache;

  result = find_best_lang_match (language,
				 records,
				 num_records,
				 record_size);

  if (cache)
    *cache = result;

  return result;
}
#define FIND_BEST_LANG_MATCH_CACHED(language, cache_key, records) \
	    find_best_lang_match_cached ((language), 
            pango_language_get_private (language) ? \
            &(pango_language_get_private (language)->cache_key) : NULL, \
            records, \
            G_N_ELEMENTS (records), \
            sizeof (*records));
typedef struct {
  char lang[6];
  guint16 offset;
} LangInfo;

/* Pure black magic, based on appendix of dsohowto.pdf */
#define POOLSTRFIELD(line) POOLSTRFIELD1(line)
#define POOLSTRFIELD1(line) str##line
struct _LangPoolStruct {
  char str0[1];
#define LANGUAGE(id, source, sample) char POOLSTRFIELD(__LINE__)[sizeof(sample)];
#include "pango-language-sample-table.h"
#undef LANGUAGE
};

static const union _LangPool {
  struct _LangPoolStruct lang_pool_struct;
  const char str[1];
} lang_pool = { {
    "",
#define LANGUAGE(id, source, sample) sample,
#include "pango-language-sample-table.h"
#undef LANGUAGE
} };
static const LangInfo lang_texts[] = {
#define LANGUAGE(id, source, sample) {
        G_STRINGIFY(id),
	G_STRUCT_OFFSET(struct _LangPoolStruct, POOLSTRFIELD(__LINE__))
},
#include "pango-language-sample-table.h"
#undef LANGUAGE
  /* One extra entry with no final comma, to make it C89-happy */
 {"~~",	0}
};
const char *
pango_language_get_sample_string (PangoLanguage *language)
{
  const LangInfo *lang_info;

  if (!language)
    language = pango_language_get_default ();

  lang_info = FIND_BEST_LANG_MATCH_CACHED (language,
					   lang_info,
					   lang_texts);

  if (lang_info)
    return lang_pool.str + lang_info->offset;

  return "The quick brown fox jumps over the lazy dog.";
}
/*
 * From language to script
 */
#include "pango-script-lang-table.h"
const PangoScript *
pango_language_get_scripts (PangoLanguage *language,
			    int           *num_scripts)
{
  const PangoScriptForLang *script_for_lang;
  unsigned int j;

  script_for_lang = FIND_BEST_LANG_MATCH_CACHED (language,
						 script_for_lang,
						 pango_script_for_lang);

  if (!script_for_lang)
    {
      if (num_scripts)
	*num_scripts = 0;

      return NULL;
    }

  if (num_scripts)
    {
      for (j = 0; j < G_N_ELEMENTS (script_for_lang->scripts); j++)
	if (script_for_lang->scripts[j] == 0)
	  break;

      g_assert (j > 0);

      *num_scripts = j;
    }

  return script_for_lang->scripts;
}
gboolean
pango_language_includes_script (PangoLanguage *language,
				PangoScript    script)
{
  const PangoScript *scripts;
  int num_scripts, j;

/* copied from the one in pango-script.c */
#define REAL_SCRIPT(script) \
  ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN)

  if (!REAL_SCRIPT (script))
    return TRUE;

#undef REAL_SCRIPT

  scripts = pango_language_get_scripts (language, &num_scripts);
  if (!scripts)
    return TRUE;

  for (j = 0; j < num_scripts; j++)
    if (scripts[j] == script)
      return TRUE;

  return FALSE;
}
/*
 * From script to language
 */
static PangoLanguage **
parse_default_languages (void)
{
  char *p, *p_copy;
  gboolean done = FALSE;
  GArray *langs;

  p = getenv ("PANGO_LANGUAGE");

  if (p == NULL)
    p = getenv ("LANGUAGE");

  if (p == NULL)
    return NULL;

  p_copy = p = g_strdup (p);

  langs = g_array_new (TRUE, FALSE, sizeof (PangoLanguage *));

  while (!done)
    {
      char *end = strpbrk (p, LANGUAGE_SEPARATORS);
      if (!end)
	{
	  end = p + strlen (p);
	  done = TRUE;
	}
      else
        *end = '\0';

      /* skip empty languages, and skip the language 'C' */
      if (p != end && !(p + 1 == end && *p == 'C'))
        {
	  PangoLanguage *l = pango_language_from_string (p);
	  
	  g_array_append_val (langs, l);
	}

      if (!done)
	p = end + 1;
    }

  g_free (p_copy);

  return (PangoLanguage **) g_array_free (langs, FALSE);
}
static PangoLanguage *
_pango_script_get_default_language (PangoScript script)
{
  G_LOCK_DEFINE_STATIC (languages);
  static gboolean initialized = FALSE; /* MT-safe */
  static PangoLanguage * const * languages = NULL; /* MT-safe */
  static GHashTable *hash = NULL; /* MT-safe */
  PangoLanguage *result, * const * p;

  G_LOCK (languages);

  if (G_UNLIKELY (!initialized))
    {
      languages = parse_default_languages ();

      if (languages)
	hash = g_hash_table_new (NULL, NULL);

      initialized = TRUE;
    }

  if (!languages)
    {
      result = NULL;
      goto out;
    }

  if (g_hash_table_lookup_extended (hash, GINT_TO_POINTER (script),
      NULL, (gpointer *) (gpointer) &result))
    goto out;

  for (p = languages; *p; p++)
    if (pango_language_includes_script (*p, script))
      break;
  result = *p;

  g_hash_table_insert (hash, GINT_TO_POINTER (script), result);

out:
  G_UNLOCK (languages);

  return result;
}
PangoLanguage *
pango_script_get_sample_language (PangoScript script)
{
  static const char sample_languages[][4] = {
    "",    /* PANGO_SCRIPT_COMMON */
  };

  const char *sample_language;
  PangoLanguage *result;

  g_return_val_if_fail (script >= 0, NULL);

  if ((guint)script >= G_N_ELEMENTS (sample_languages))
    return NULL;

  result = _pango_script_get_default_language (script);
  if (result)
    return result;

  sample_language = sample_languages[script];

  if (!sample_language[0])
    return NULL;
  else
    return pango_language_from_string (sample_language);
}