From b0534cbf05221b141ebd2edb5a71e94742b369a3 Mon Sep 17 00:00:00 2001
From: James Falcon <james.falcon@canonical.com>
Date: Tue, 14 Jun 2022 06:24:40 -0500
Subject: [PATCH] Remove schema errors from log

When schema errors are encountered, the section of userdata in question
gets printed to the cloud-init log. As this could contain sensitive
data, so log a generic warning instead and redirect user to run
`cloud-init schema --system` as root.

LP: #1978422
---
 cloudinit/cmd/main.py                       |  4 +++-
 cloudinit/config/schema.py                  | 15 +++++++++++---
 tests/integration_tests/modules/test_cli.py | 20 ++++++++++++------
 tests/unittests/config/test_schema.py       | 23 ++++++++++++++++++++-
 4 files changed, 51 insertions(+), 11 deletions(-)

--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -454,7 +454,9 @@ def main_init(name, args):
 
     # Validate user-data adheres to schema definition
     if os.path.exists(init.paths.get_ipath_cur("userdata_raw")):
-        validate_cloudconfig_schema(config=init.cfg, strict=False)
+        validate_cloudconfig_schema(
+            config=init.cfg, strict=False, log_details=False
+        )
     else:
         LOG.debug("Skipping user-data validation. No user-data found.")
 
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -196,6 +196,7 @@ def validate_cloudconfig_schema(
     schema: dict = None,
     strict: bool = False,
     strict_metaschema: bool = False,
+    log_details: bool = True,
 ):
     """Validate provided config meets the schema definition.
 
@@ -208,6 +209,9 @@ def validate_cloudconfig_schema(
        logging warnings.
     @param strict_metaschema: Boolean, when True validates schema using strict
        metaschema definition at runtime (currently unused)
+    @param log_details: Boolean, when True logs details of validation errors.
+       If there are concerns about logging sensitive userdata, this should
+       be set to False.
 
     @raises: SchemaValidationError when provided config does not validate
         against the provided schema.
@@ -232,12 +236,17 @@ def validate_cloudconfig_schema(
         errors += ((path, error.message),)
     if errors:
         if strict:
+            # This could output/log sensitive data
             raise SchemaValidationError(errors)
-        else:
+        if log_details:
             messages = ["{0}: {1}".format(k, msg) for k, msg in errors]
-            LOG.warning(
-                "Invalid cloud-config provided:\n%s", "\n".join(messages)
+            details = "\n" + "\n".join(messages)
+        else:
+            details = (
+                "Please run 'sudo cloud-init schema --system' to "
+                "see the schema errors."
             )
+        LOG.warning("Invalid cloud-config provided: %s", details)
 
 
 def annotated_cloudconfig_file(
--- a/tests/integration_tests/modules/test_cli.py
+++ b/tests/integration_tests/modules/test_cli.py
@@ -18,11 +18,18 @@ runcmd:
   - echo 'hi' > /var/tmp/test
 """
 
+# The '-' in 'hashed-password' fails schema validation
 INVALID_USER_DATA_SCHEMA = """\
 #cloud-config
-updates:
- notnetwork: -1
-apt_pipelining: bogus
+users:
+  - default
+  - name: newsuper
+    gecos: Big Stuff
+    groups: users, admin
+    sudo: ALL=(ALL) NOPASSWD:ALL
+    hashed-password: asdfasdf
+    shell: /bin/bash
+    lock_passwd: true
 """
 
 
@@ -69,11 +76,12 @@ def test_invalid_userdata_schema(client:
     assert result.ok
     log = client.read_from_file("/var/log/cloud-init.log")
     warning = (
-        "[WARNING]: Invalid cloud-config provided:\napt_pipelining: 'bogus'"
-        " is not valid under any of the given schemas\nupdates: Additional"
-        " properties are not allowed ('notnetwork' was unexpected)"
+        "[WARNING]: Invalid cloud-config provided: Please run "
+        "'sudo cloud-init schema --system' to see the schema errors."
     )
     assert warning in log
+    assert "asdfasdf" not in log
+
     result = client.execute("cloud-init status --long")
     if not result.ok:
         raise AssertionError(
--- a/tests/unittests/config/test_schema.py
+++ b/tests/unittests/config/test_schema.py
@@ -304,11 +304,32 @@ class TestValidateCloudConfigSchema:
         assert "cloudinit.config.schema" == module
         assert logging.WARNING == log_level
         assert (
-            "Invalid cloud-config provided:\np1: -1 is not of type 'string'"
+            "Invalid cloud-config provided: \np1: -1 is not of type 'string'"
             == log_msg
         )
 
     @skipUnlessJsonSchema()
+    def test_validateconfig_schema_sensitive(self, caplog):
+        """When log_details=False, ensure details are omitted"""
+        schema = {
+            "properties": {"hashed_password": {"type": "string"}},
+            "additionalProperties": False,
+        }
+        validate_cloudconfig_schema(
+            {"hashed-password": "secret"},
+            schema,
+            strict=False,
+            log_details=False,
+        )
+        [(module, log_level, log_msg)] = caplog.record_tuples
+        assert "cloudinit.config.schema" == module
+        assert logging.WARNING == log_level
+        assert (
+            "Invalid cloud-config provided: Please run 'sudo cloud-init "
+            "schema --system' to see the schema errors." == log_msg
+        )
+
+    @skipUnlessJsonSchema()
     def test_validateconfig_schema_emits_warning_on_missing_jsonschema(
         self, caplog
     ):
