feat: we can pass extra json.dumps args when using load_table_from_json

googleapis · Aug 7, 2023 · da90fcb · da90fcb
1 parent 0686848
commit da90fcb
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 2 deletions.
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
@@ -2759,6 +2759,7 @@ def load_table_from_json(
         project: Optional[str] = None,
         job_config: Optional[LoadJobConfig] = None,
         timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
+        json_dumps_kwargs: Optional[Dict[str, Any]] = None,
     ) -> job.LoadJob:
         """Upload the contents of a table from a JSON string or dict.
 
@@ -2811,7 +2812,8 @@ def load_table_from_json(
 
                 Can also be passed as a tuple (connect_timeout, read_timeout).
                 See :meth:`requests.Session.request` documentation for details.
-
+            json_dumps_kwargs:
+                Extra keyword arguments for ``json.dumps``. Usefull when you want to parse datetime objects.
         Returns:
             google.cloud.bigquery.job.LoadJob: A new load job.
 
@@ -2842,7 +2844,13 @@ def load_table_from_json(
 
         destination = _table_arg_to_table_ref(destination, default_project=self.project)
 
-        data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
+        if json_dumps_kwargs is None:
+            json_dumps_kwargs = {}
+
+        data_str = "\n".join(
+            json.dumps(item, ensure_ascii=False, **json_dumps_kwargs)
+            for item in json_rows
+        )
         encoded_str = data_str.encode()
         data_file = io.BytesIO(encoded_str)
         return self.load_table_from_file(

diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -8665,6 +8665,65 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
         sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
         assert sent_config.source_format == job.SourceFormat.CSV
 
+    def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self):
+        from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+        from google.cloud.bigquery import job
+
+        def json_serial(obj):
+            """JSON serializer for objects not serializable by default json code
+
+            Ref: https://stackoverflow.com/a/22238613
+            """
+
+            if isinstance(obj, (datetime.datetime, datetime.date)):
+                return obj.isoformat()
+            raise TypeError("Type %s not serializable" % type(obj))
+
+        client = self._make_client()
+
+        json_rows = [
+            {
+                "name": "One",
+                "age": 11,
+                "birthday": datetime.date(2008, 9, 10),
+                "adult": False,
+            },
+            {
+                "name": "Two",
+                "age": 22,
+                "birthday": datetime.date(1997, 8, 9),
+                "adult": True,
+            },
+        ]
+
+        load_patch = mock.patch(
+            "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+        )
+
+        with load_patch as load_table_from_file:
+            client.load_table_from_json(
+                json_rows, self.TABLE_REF, json_dumps_kwargs={"default": json_serial}
+            )
+
+        load_table_from_file.assert_called_once_with(
+            client,
+            mock.ANY,
+            self.TABLE_REF,
+            size=mock.ANY,
+            num_retries=_DEFAULT_NUM_RETRIES,
+            job_id=mock.ANY,
+            job_id_prefix=None,
+            location=client.location,
+            project=client.project,
+            job_config=mock.ANY,
+            timeout=DEFAULT_TIMEOUT,
+        )
+
+        sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+        assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+        assert sent_config.schema is None
+        assert sent_config.autodetect
+
     def test_load_table_from_json_basic_use(self):
         from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
         from google.cloud.bigquery import job