Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif

#ifdef __cplusplus
Expand Down
21 changes: 18 additions & 3 deletions Lib/test/test_c_locale_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import sys
import sysconfig
import shutil
import subprocess
from collections import namedtuple

import test.support
Expand All @@ -18,9 +17,12 @@
# Set our expectation for the default encoding used in the C locale
# for the filesystem encoding and the standard streams

# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
# While most *nix platforms default to ASCII in the C locale, some use a
# different encoding.
if sys.platform.startswith("aix"):
C_LOCALE_STREAM_ENCODING = "iso8859-1"
elif test.support.is_android:
C_LOCALE_STREAM_ENCODING = "utf-8"
else:
C_LOCALE_STREAM_ENCODING = "ascii"

Expand Down Expand Up @@ -301,6 +303,19 @@ def _check_c_locale_coercion(self,
# See https://bugs.python.org/issue30672 for discussion
if locale_to_set == "POSIX":
continue

# Platforms using UTF-8 in the C locale do not print
# CLI_COERCION_WARNING when all the locale envt variables are
# not set or set to the empty string.
_expected_warnings = expected_warnings
for _env_var in base_var_dict:
if base_var_dict[_env_var]:
break
else:
if (C_LOCALE_STREAM_ENCODING == "utf-8" and
locale_to_set == "" and coerce_c_locale == "warn"):
_expected_warnings = None

with self.subTest(env_var=env_var,
nominal_locale=locale_to_set,
PYTHONCOERCECLOCALE=coerce_c_locale):
Expand All @@ -312,7 +327,7 @@ def _check_c_locale_coercion(self,
self._check_child_encoding_details(var_dict,
fs_encoding,
stream_encoding,
expected_warnings,
_expected_warnings,
coercion_expected)

def test_test_PYTHONCOERCECLOCALE_not_set(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
A new internal ``_Py_SetLocaleFromEnv(category)`` helper function has been
added in order to improve the consistency of behaviour across different
``libc`` implementations (e.g. Android doesn't support setting the locale from
the environment by default).
2 changes: 1 addition & 1 deletion Modules/readline.c
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@ call_readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
char *saved_locale = strdup(setlocale(LC_CTYPE, NULL));
if (!saved_locale)
Py_FatalError("not enough memory to save locale");
setlocale(LC_CTYPE, "");
_Py_SetLocaleFromEnv(LC_CTYPE);
#endif

if (sys_stdin != rl_instream || sys_stdout != rl_outstream) {
Expand Down
9 changes: 1 addition & 8 deletions Programs/python.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,8 @@ main(int argc, char **argv)
return 1;
}

#ifdef __ANDROID__
/* Passing "" to setlocale() on Android requests the C locale rather
* than checking environment variables, so request C.UTF-8 explicitly
*/
setlocale(LC_ALL, "C.UTF-8");
#else
/* Reconfigure the locale to the default for this process */
setlocale(LC_ALL, "");
#endif
_Py_SetLocaleFromEnv(LC_ALL);

/* The legacy C locale assumes ASCII as the default text encoding, which
* causes problems not only for the CPython runtime, but also other
Expand Down
77 changes: 65 additions & 12 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
const char *newloc = target->locale_name;

/* Reset locale back to currently configured defaults */
setlocale(LC_ALL, "");
_Py_SetLocaleFromEnv(LC_ALL);

/* Set the relevant locale environment variable */
if (setenv("LC_CTYPE", newloc, 1)) {
Expand All @@ -472,7 +472,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
}

/* Reconfigure with the overridden environment variables */
setlocale(LC_ALL, "");
_Py_SetLocaleFromEnv(LC_ALL);
}
#endif

Expand Down Expand Up @@ -503,13 +503,14 @@ _Py_CoerceLegacyLocale(void)
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
defined(HAVE_LANGINFO_H) && defined(CODESET)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we go back to not calling the coercion function on Android at all, this adjustment won't be needed.

/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {
/* CODESET is not set or empty, so skip coercion */
new_locale = NULL;
setlocale(LC_CTYPE, "");
_Py_SetLocaleFromEnv(LC_CTYPE);
continue;
}
#endif
Expand All @@ -524,6 +525,65 @@ _Py_CoerceLegacyLocale(void)
#endif
}

/* _Py_SetLocaleFromEnv() is a wrapper around setlocale(category, "") to
* isolate the idiosyncrasies of different libc implementations. It reads the
* appropriate environment variable and uses its value to select the locale for
* 'category'. */
char *
_Py_SetLocaleFromEnv(int category)
{
#ifdef __ANDROID__
const char *locale;
const char **pvar;
#ifdef PY_COERCE_C_LOCALE
const char *coerce_c_locale;
#endif
const char *utf8_locale = "C.UTF-8";
const char *env_var_set[] = {
"LC_ALL",
"LC_CTYPE",
"LANG",
NULL,
};

/* Android setlocale(category, "") doesn't check the environment variables
* and incorrectly sets the "C" locale at API 24 and older APIs. We only
* check the environment variables listed in env_var_set. */
for (pvar=env_var_set; *pvar; pvar++) {
locale = getenv(*pvar);
if (locale != NULL && *locale != '\0') {
if (strcmp(locale, utf8_locale) == 0 ||
strcmp(locale, "en_US.UTF-8") == 0) {
return setlocale(category, utf8_locale);
}
return setlocale(category, "C");
}
}

/* Android uses UTF-8, so explicitly set the locale to C.UTF-8 if none of
* LC_ALL, LC_CTYPE, or LANG is set to a non-empty string.
* Quote from POSIX section "8.2 Internationalization Variables":
* "4. If the LANG environment variable is not set or is set to the empty
* string, the implementation-defined default locale shall be used." */

#ifdef PY_COERCE_C_LOCALE
coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strcmp(coerce_c_locale, "0") != 0) {
/* Some other ported code may check the environment variables (e.g. in
* extension modules), so we make sure that they match the locale
* configuration */
if (setenv("LC_CTYPE", utf8_locale, 1)) {
fprintf(stderr, "Warning: failed setting the LC_CTYPE "
"environment variable to %s\n", utf8_locale);
}
}
#endif
return setlocale(category, utf8_locale);
#else /* __ANDROID__ */
return setlocale(category, "");
#endif /* __ANDROID__ */
}


/* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call.
Expand Down Expand Up @@ -599,19 +659,12 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
exit(1);
}

#ifdef __ANDROID__
/* Passing "" to setlocale() on Android requests the C locale rather
* than checking environment variables, so request C.UTF-8 explicitly
*/
setlocale(LC_CTYPE, "C.UTF-8");
#else
#ifndef MS_WINDOWS
/* Set up the LC_CTYPE locale, so we can obtain
the locale's charset without having to switch
locales. */
setlocale(LC_CTYPE, "");
_Py_SetLocaleFromEnv(LC_CTYPE);
_emit_stderr_warning_for_legacy_locale();
#endif
#endif

if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
Expand Down