Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Include/coreconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ typedef struct {
If set to -1 (default), it is set to !Py_UnbufferedStdioFlag. */
int buffered_stdio;

/* Encoding of sys.stdin, sys.stdout and sys.stderr.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_errors' attribute. */
char *stdio_encoding;

/* Error handler of sys.stdin and sys.stdout.
Value set from PYTHONIOENCODING environment variable and
Py_SetStandardStreamEncoding() function.
See also 'stdio_encoding' attribute. */
char *stdio_errors;

#ifdef MS_WINDOWS
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
encoding for the filesystem encoding.
Expand Down
3 changes: 3 additions & 0 deletions Include/pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
#ifdef Py_BUILD_CORE
PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
#endif

#ifdef __cplusplus
}
Expand Down
36 changes: 33 additions & 3 deletions Lib/test/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'quiet': 0,
'user_site_directory': 1,
'buffered_stdio': 1,
# None means that check_config() gets the expected encoding at runtime
'stdio_encoding': None,
'stdio_errors': None,

'_install_importlib': 1,
'_check_hash_pycs_mode': 'default',
'_frozen': 0,
}

def get_stdio_encoding(self, env):
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
args = (sys.executable, '-c', code)
proc = subprocess.run(args, env=env, text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
if proc.returncode:
raise Exception(f"failed to get the stdio encoding: stdout={proc.stdout!r}")
out = proc.stdout.rstrip()
return out.split()

def check_config(self, testname, expected):
expected = dict(self.DEFAULT_CONFIG, **expected)

env = dict(os.environ)
for key in list(env):
if key.startswith('PYTHON'):
Expand All @@ -303,13 +319,19 @@ def check_config(self, testname, expected):
# on the current locale
env['PYTHONCOERCECLOCALE'] = '0'
env['PYTHONUTF8'] = '0'
out, err = self.run_embedded_interpreter(testname, env=env)
# Ignore err

expected = dict(self.DEFAULT_CONFIG, **expected)
if expected['stdio_encoding'] is None or expected['stdio_errors'] is None:
res = self.get_stdio_encoding(env)
if expected['stdio_encoding'] is None:
expected['stdio_encoding'] = res[0]
if expected['stdio_errors'] is None:
expected['stdio_errors'] = res[1]
for key, value in expected.items():
expected[key] = str(value)

out, err = self.run_embedded_interpreter(testname, env=env)
# Ignore err

config = {}
for line in out.splitlines():
key, value = line.split(' = ', 1)
Expand All @@ -331,7 +353,11 @@ def test_init_global_config(self):
'verbose': 1,
'quiet': 1,
'buffered_stdio': 0,

'utf8_mode': 1,
'stdio_encoding': 'utf-8',
'stdio_errors': 'surrogateescape',

'user_site_directory': 0,
'_frozen': 1,
}
Expand All @@ -350,6 +376,8 @@ def test_init_from_config(self):
'malloc_stats': 1,

'utf8_mode': 1,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',

'pycache_prefix': 'conf_pycache_prefix',
'program_name': './conf_program_name',
Expand Down Expand Up @@ -387,6 +415,8 @@ def test_init_env(self):
'write_bytecode': 0,
'verbose': 1,
'buffered_stdio': 0,
'stdio_encoding': 'iso8859-1',
'stdio_errors': 'replace',
'user_site_directory': 0,
'faulthandler': 1,
'dev_mode': 1,
Expand Down
8 changes: 8 additions & 0 deletions Programs/_testembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ dump_config(void)
printf("user_site_directory = %i\n", config->user_site_directory);
printf("buffered_stdio = %i\n", config->buffered_stdio);
ASSERT_EQUAL(config->buffered_stdio, !Py_UnbufferedStdioFlag);
printf("stdio_encoding = %s\n", config->stdio_encoding);
printf("stdio_errors = %s\n", config->stdio_errors);

/* FIXME: test legacy_windows_fs_encoding */
/* FIXME: test legacy_windows_stdio */
Expand Down Expand Up @@ -532,6 +534,11 @@ static int test_init_from_config(void)
Py_UnbufferedStdioFlag = 0;
config.buffered_stdio = 0;

putenv("PYTHONIOENCODING=cp424");
Py_SetStandardStreamEncoding("ascii", "ignore");
config.stdio_encoding = "iso8859-1";
config.stdio_errors = "replace";

putenv("PYTHONNOUSERSITE=");
Py_NoUserSiteDirectory = 0;
config.user_site_directory = 0;
Expand Down Expand Up @@ -569,6 +576,7 @@ static void test_init_env_putenvs(void)
putenv("PYTHONNOUSERSITE=1");
putenv("PYTHONFAULTHANDLER=1");
putenv("PYTHONDEVMODE=1");
putenv("PYTHONIOENCODING=iso8859-1:replace");
/* FIXME: test PYTHONWARNINGS */
/* FIXME: test PYTHONEXECUTABLE */
/* FIXME: test PYTHONHOME */
Expand Down
181 changes: 179 additions & 2 deletions Python/coreconfig.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include "Python.h"
#include "internal/pystate.h"
#include <locale.h>
#ifdef HAVE_LANGINFO_H
# include <langinfo.h>
#endif


#define DECODE_LOCALE_ERR(NAME, LEN) \
Expand Down Expand Up @@ -89,8 +92,8 @@ _Py_wstrlist_copy(int len, wchar_t **list)
* mechanism that attempts to figure out an appropriate IO encoding
*/

char *_Py_StandardStreamEncoding = NULL;
char *_Py_StandardStreamErrors = NULL;
static char *_Py_StandardStreamEncoding = NULL;
static char *_Py_StandardStreamErrors = NULL;

int
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
Expand Down Expand Up @@ -205,6 +208,9 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
CLEAR(config->dll_path);
#endif
CLEAR(config->base_exec_prefix);

CLEAR(config->stdio_encoding);
CLEAR(config->stdio_errors);
#undef CLEAR
#undef CLEAR_WSTRLIST
}
Expand All @@ -216,6 +222,15 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
_PyCoreConfig_Clear(config);

#define COPY_ATTR(ATTR) config->ATTR = config2->ATTR
#define COPY_STR_ATTR(ATTR) \
do { \
if (config2->ATTR != NULL) { \
config->ATTR = _PyMem_RawStrdup(config2->ATTR); \
if (config->ATTR == NULL) { \
return -1; \
} \
} \
} while (0)
#define COPY_WSTR_ATTR(ATTR) \
do { \
if (config2->ATTR != NULL) { \
Expand Down Expand Up @@ -287,6 +302,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
COPY_ATTR(quiet);
COPY_ATTR(user_site_directory);
COPY_ATTR(buffered_stdio);
COPY_STR_ATTR(stdio_encoding);
COPY_STR_ATTR(stdio_errors);
#ifdef MS_WINDOWS
COPY_ATTR(legacy_windows_fs_encoding);
COPY_ATTR(legacy_windows_stdio);
Expand Down Expand Up @@ -932,6 +949,161 @@ config_init_locale(_PyCoreConfig *config)
}


static const char *
get_stdio_errors(const _PyCoreConfig *config)
{
#ifndef MS_WINDOWS
const char *loc = setlocale(LC_CTYPE, NULL);
if (loc != NULL) {
/* surrogateescape is the default in the legacy C and POSIX locales */
if (strcmp(loc, "C") == 0 || strcmp(loc, "POSIX") == 0) {
return "surrogateescape";
}

#ifdef PY_COERCE_C_LOCALE
/* surrogateescape is the default in locale coercion target locales */
if (_Py_IsLocaleCoercionTarget(loc)) {
return "surrogateescape";
}
#endif
}

return "strict";
#else
/* On Windows, always use surrogateescape by default */
return "surrogateescape";
#endif
}


_PyInitError
_Py_get_locale_encoding(char **locale_encoding)
{
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
#elif defined(__ANDROID__)
const char *encoding = "UTF-8";
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
}
#endif
*locale_encoding = _PyMem_RawStrdup(encoding);
if (*locale_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
return _Py_INIT_OK();
}


static _PyInitError
config_init_stdio_encoding(_PyCoreConfig *config)
{
/* If Py_SetStandardStreamEncoding() have been called, use these
parameters. */
if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) {
config->stdio_encoding = _PyMem_RawStrdup(_Py_StandardStreamEncoding);
if (config->stdio_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

if (config->stdio_errors == NULL && _Py_StandardStreamErrors != NULL) {
config->stdio_errors = _PyMem_RawStrdup(_Py_StandardStreamErrors);
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

if (config->stdio_encoding != NULL && config->stdio_errors != NULL) {
return _Py_INIT_OK();
}

/* PYTHONIOENCODING environment variable */
const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONIOENCODING");
if (opt) {
char *pythonioencoding = _PyMem_RawStrdup(opt);
if (pythonioencoding == NULL) {
return _Py_INIT_NO_MEMORY();
}

char *err = strchr(pythonioencoding, ':');
if (err) {
*err = '\0';
err++;
if (!err[0]) {
err = NULL;
}
}

/* Does PYTHONIOENCODING contain an encoding? */
if (pythonioencoding[0]) {
if (config->stdio_encoding == NULL) {
config->stdio_encoding = _PyMem_RawStrdup(pythonioencoding);
if (config->stdio_encoding == NULL) {
PyMem_RawFree(pythonioencoding);
return _Py_INIT_NO_MEMORY();
}
}

/* If the encoding is set but not the error handler,
use "strict" error handler by default.
PYTHONIOENCODING=latin1 behaves as
PYTHONIOENCODING=latin1:strict. */
if (!err) {
err = "strict";
}
}

if (config->stdio_errors == NULL && err != NULL) {
config->stdio_errors = _PyMem_RawStrdup(err);
if (config->stdio_errors == NULL) {
PyMem_RawFree(pythonioencoding);
return _Py_INIT_NO_MEMORY();
}
}

PyMem_RawFree(pythonioencoding);
}

/* UTF-8 Mode uses UTF-8/surrogateescape */
if (config->utf8_mode) {
if (config->stdio_encoding == NULL) {
config->stdio_encoding = _PyMem_RawStrdup("utf-8");
if (config->stdio_encoding == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
if (config->stdio_errors == NULL) {
config->stdio_errors = _PyMem_RawStrdup("surrogateescape");
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}
}

/* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) {
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->stdio_errors == NULL) {
const char *errors = get_stdio_errors(config);
config->stdio_errors = _PyMem_RawStrdup(errors);
if (config->stdio_errors == NULL) {
return _Py_INIT_NO_MEMORY();
}
}

return _Py_INIT_OK();
}


/* Read configuration settings from standard locations
*
* This function doesn't make any changes to the interpreter state - it
Expand Down Expand Up @@ -1044,6 +1216,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
config->argc = 0;
}

err = config_init_stdio_encoding(config);
if (_Py_INIT_FAILED(err)) {
return err;
}

assert(config->coerce_c_locale >= 0);
assert(config->use_environment >= 0);

Expand Down
Loading