Skip to content

Convert json to parquet and send to Azure Blob Storage#

Load a local json file into a PyArrow table, then write it to a parquet file in Azure Blob Storage without using pandas.

with pyarrow only without pandas
# pip install adlfs pyarrow
# https://arrow.apache.org/docs/python/parquet.html#reading-from-cloud-storage

from os import environ

import pyarrow as pa
import pyarrow.parquet as pq
from adlfs import AzureBlobFileSystem


json_file = "aaa.json"
blob_connection_string = environ["AZURE_BLOB_CONNECTION_STRING"]
blob_container_name = "bbb"

table = pa.Table.from_json(source_file)

abfs = AzureBlobFileSystem(connection_string=blob_connection_string)

pq.write_table(
    table,
    f"{blob_container_name}/another_folder/output.parquet",
    filesystem=abfs
)

Comments