Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Lib/test/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,17 +171,17 @@ def test_forced_io_encoding(self):
"stdout: {out_encoding}:ignore",
"stderr: {out_encoding}:backslashreplace",
"--- Set encoding only ---",
"Expected encoding: latin-1",
"Expected encoding: iso8859-1",
"Expected errors: default",
"stdin: latin-1:{errors}",
"stdout: latin-1:{errors}",
"stderr: latin-1:backslashreplace",
"stdin: iso8859-1:{errors}",
"stdout: iso8859-1:{errors}",
"stderr: iso8859-1:backslashreplace",
"--- Set encoding and errors ---",
"Expected encoding: latin-1",
"Expected encoding: iso8859-1",
"Expected errors: replace",
"stdin: latin-1:replace",
"stdout: latin-1:replace",
"stderr: latin-1:backslashreplace"])
"stdin: iso8859-1:replace",
"stdout: iso8859-1:replace",
"stderr: iso8859-1:backslashreplace"])
expected_output = expected_output.format(
in_encoding=expected_stream_encoding,
out_encoding=expected_stream_encoding,
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ def c_locale_get_error_handler(self, isolated=False, encoding=None):
'dump("stdout")',
'dump("stderr")',
))
args = [sys.executable, "-c", code]
args = [sys.executable, "-X", "utf8=0", "-c", code]
if isolated:
args.append("-I")
if encoding is not None:
Expand Down Expand Up @@ -712,8 +712,8 @@ def test_c_locale_surrogateescape(self):
# have no any effect
out = self.c_locale_get_error_handler(encoding=':')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='')
self.assertEqual(out,
Expand Down
12 changes: 6 additions & 6 deletions Lib/test/test_utf8_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,16 @@ def test_stdio(self):
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING="latin1")
self.assertEqual(out.splitlines(),
['stdin: latin1/strict',
'stdout: latin1/strict',
'stderr: latin1/backslashreplace'])
['stdin: iso8859-1/strict',
'stdout: iso8859-1/strict',
'stderr: iso8859-1/backslashreplace'])

out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(),
['stdin: UTF-8/namereplace',
'stdout: UTF-8/namereplace',
'stderr: UTF-8/backslashreplace'])
['stdin: utf-8/namereplace',
'stdout: utf-8/namereplace',
'stderr: utf-8/backslashreplace'])

def test_io(self):
code = textwrap.dedent('''
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Python now gets the locale encoding with C code to initialize the encoding
of standard streams like sys.stdout. Moreover, the encoding is now
initialized to the Python codec name to get a normalized encoding name and
to ensure that the codec is loaded. The change avoids importing _bootlocale
and _locale modules at startup by default.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix the error handler of standard streams like sys.stdout:
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
"strict".
2 changes: 1 addition & 1 deletion Modules/_localemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ PyLocale_strxfrm(PyObject* self, PyObject* args)
static PyObject*
PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
{
char encoding[100];
char encoding[20];
char locale[100];

PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
Expand Down
4 changes: 2 additions & 2 deletions Programs/_testembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
printf("--- Set errors only ---\n");
check_stdio_details(NULL, "ignore");
printf("--- Set encoding only ---\n");
check_stdio_details("latin-1", NULL);
check_stdio_details("iso8859-1", NULL);
printf("--- Set encoding and errors ---\n");
check_stdio_details("latin-1", "replace");
check_stdio_details("iso8859-1", "replace");

/* Check calling after initialization fails */
Py_Initialize();
Expand Down
86 changes: 63 additions & 23 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,22 +244,26 @@ get_codec_name(const char *encoding)
return NULL;
}

static char*
get_locale_encoding(void)
static _PyInitError
get_locale_encoding(char **locale_encoding)
{
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
char* codeset = nl_langinfo(CODESET);
if (!codeset || codeset[0] == '\0') {
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
return NULL;
}
return get_codec_name(codeset);
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
#elif defined(__ANDROID__)
return get_codec_name("UTF-8");
const char *encoding = "UTF-8";
#else
PyErr_SetNone(PyExc_NotImplementedError);
return NULL;
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
#endif
*locale_encoding = _PyMem_RawStrdup(encoding);
if (*locale_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
return _Py_INIT_OK();
}

static _PyInitError
Expand Down Expand Up @@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
};

static const char *
get_default_standard_stream_error_handler(void)
get_stdio_errors(void)
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
Expand All @@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
#endif
}

/* Otherwise return NULL to request the typical default error handler */
return NULL;
return "strict";
}

#ifdef PY_COERCE_C_LOCALE
Expand Down Expand Up @@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
Py_HasFileSystemDefaultEncoding = 1;
}
else {
Py_FileSystemDefaultEncoding = get_locale_encoding();
char *locale_encoding;
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}

Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
PyMem_RawFree(locale_encoding);
if (Py_FileSystemDefaultEncoding == NULL) {
return _Py_INIT_ERR("Unable to get the locale encoding");
return _Py_INIT_ERR("failed to get the Python codec "
"of the locale encoding");
}

Py_HasFileSystemDefaultEncoding = 0;
Expand Down Expand Up @@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
PyObject * encoding_attr;
char *pythonioencoding = NULL;
const char *encoding, *errors;
char *locale_encoding = NULL;
char *codec_name = NULL;
_PyInitError res = _Py_INIT_OK();

/* Hack to avoid a nasty recursion issue when Python is invoked
Expand Down Expand Up @@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
errors = err;
}
}
if (*pythonioencoding && !encoding) {
if (!encoding && *pythonioencoding) {
encoding = pythonioencoding;
if (!errors) {
errors = "strict";
}
}
}
else if (interp->core_config.utf8_mode) {
encoding = "utf-8";
errors = "surrogateescape";

if (interp->core_config.utf8_mode) {
if (!encoding) {
encoding = "utf-8";
}
if (!errors) {
errors = "surrogateescape";
}
}

if (!errors && !pythonioencoding) {
if (!errors) {
/* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler();
errors = get_stdio_errors();
}
}

if (encoding == NULL) {
_PyInitError err = get_locale_encoding(&locale_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
encoding = locale_encoding;
}

codec_name = get_codec_name(encoding);
if (codec_name == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"failed to get the Python codec name "
"of stdio encoding");
goto error;
}
encoding = codec_name;

/* Set sys.stdin */
fd = fileno(stdin);
/* Under some conditions stdin, stdout and stderr may not be connected
Expand Down Expand Up @@ -1928,6 +1966,8 @@ init_sys_streams(PyInterpreterState *interp)

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

PyMem_RawFree(locale_encoding);
PyMem_RawFree(codec_name);
PyMem_Free(pythonioencoding);
Py_XDECREF(bimod);
Py_XDECREF(iomod);
Expand Down