[{"order_id":"ord_91a3","total":149.99,"status":"shipped"}, {"order_id":"ord_b7f2","total":34.50,"status":"pending"}]
[{"order_id":"ord_91a3","total":149.99,"status":"shipped"}, {"order_id":"ord_b7f2","total":34.50,"status":"pending"}]
[{"order_id":"ord_91a3","total":149.99,"status":"shipped"}, {"order_id":"ord_b7f2","total":34.50,"status":"pending"}]
order_id,total,status
ord_91a3,149.99,shipped
ord_b7f2,34.50,pending
order_id,total,status
ord_91a3,149.99,shipped
ord_b7f2,34.50,pending
order_id,total,status
ord_91a3,149.99,shipped
ord_b7f2,34.50,pending
# Python 3.8+ — minimal json to csv example
import json
import csv # Sample JSON data — an array of order objects
json_string = """
[ {"order_id": "ord_91a3", "product": "Wireless Keyboard", "quantity": 2, "unit_price": 74.99}, {"order_id": "ord_b7f2", "product": "USB-C Hub", "quantity": 1, "unit_price": 34.50}, {"order_id": "ord_c4e8", "product": "Monitor Stand", "quantity": 3, "unit_price": 29.95}
]
""" records = json.loads(json_string) with open("orders.csv", "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) # orders.csv:
# order_id,product,quantity,unit_price
# ord_91a3,Wireless Keyboard,2,74.99
# ord_b7f2,USB-C Hub,1,34.50
# ord_c4e8,Monitor Stand,3,29.95
# Python 3.8+ — minimal json to csv example
import json
import csv # Sample JSON data — an array of order objects
json_string = """
[ {"order_id": "ord_91a3", "product": "Wireless Keyboard", "quantity": 2, "unit_price": 74.99}, {"order_id": "ord_b7f2", "product": "USB-C Hub", "quantity": 1, "unit_price": 34.50}, {"order_id": "ord_c4e8", "product": "Monitor Stand", "quantity": 3, "unit_price": 29.95}
]
""" records = json.loads(json_string) with open("orders.csv", "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) # orders.csv:
# order_id,product,quantity,unit_price
# ord_91a3,Wireless Keyboard,2,74.99
# ord_b7f2,USB-C Hub,1,34.50
# ord_c4e8,Monitor Stand,3,29.95
# Python 3.8+ — minimal json to csv example
import json
import csv # Sample JSON data — an array of order objects
json_string = """
[ {"order_id": "ord_91a3", "product": "Wireless Keyboard", "quantity": 2, "unit_price": 74.99}, {"order_id": "ord_b7f2", "product": "USB-C Hub", "quantity": 1, "unit_price": 34.50}, {"order_id": "ord_c4e8", "product": "Monitor Stand", "quantity": 3, "unit_price": 29.95}
]
""" records = json.loads(json_string) with open("orders.csv", "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) # orders.csv:
# order_id,product,quantity,unit_price
# ord_91a3,Wireless Keyboard,2,74.99
# ord_b7f2,USB-C Hub,1,34.50
# ord_c4e8,Monitor Stand,3,29.95
# Python 3.8+ — read JSON file, write CSV file
import json
import csv with open("server_metrics.json", encoding="utf-8") as jf: metrics = json.load(jf) # json.load() for file objects # Explicit fieldnames control column order
columns = ["timestamp", "hostname", "cpu_percent", "memory_mb", "disk_io_ops"] with open("server_metrics.csv", "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=columns, extrasaction="ignore") writer.writeheader() writer.writerows(metrics) # Only the five specified columns appear, in exactly that order
# Python 3.8+ — read JSON file, write CSV file
import json
import csv with open("server_metrics.json", encoding="utf-8") as jf: metrics = json.load(jf) # json.load() for file objects # Explicit fieldnames control column order
columns = ["timestamp", "hostname", "cpu_percent", "memory_mb", "disk_io_ops"] with open("server_metrics.csv", "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=columns, extrasaction="ignore") writer.writeheader() writer.writerows(metrics) # Only the five specified columns appear, in exactly that order
# Python 3.8+ — read JSON file, write CSV file
import json
import csv with open("server_metrics.json", encoding="utf-8") as jf: metrics = json.load(jf) # json.load() for file objects # Explicit fieldnames control column order
columns = ["timestamp", "hostname", "cpu_percent", "memory_mb", "disk_io_ops"] with open("server_metrics.csv", "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=columns, extrasaction="ignore") writer.writeheader() writer.writerows(metrics) # Only the five specified columns appear, in exactly that order
# Python 3.8+ — pre-process datetime and Decimal before CSV write
import json
import csv
from datetime import datetime, timezone
from decimal import Decimal
from uuid import UUID # Simulating parsed API response with Python types
transactions = [ { "txn_id": UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"), "created_at": datetime(2026, 3, 15, 9, 30, 0, tzinfo=timezone.utc), "amount": Decimal("1249.99"), "currency": "USD", "merchant": "CloudHost Inc.", }, { "txn_id": UUID("b2c3d4e5-f6a7-8901-bcde-f12345678901"), "created_at": datetime(2026, 3, 15, 14, 12, 0, tzinfo=timezone.utc), "amount": Decimal("87.50"), "currency": "EUR", "merchant": "DataSync GmbH", },
] def prepare_row(record: dict) -> dict: """Convert non-string types to CSV-friendly strings.""" return { "txn_id": str(record["txn_id"]), "created_at": record["created_at"].isoformat(), "amount": f"{record['amount']:.2f}", "currency": record["currency"], "merchant": record["merchant"], } with open("transactions.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["txn_id", "created_at", "amount", "currency", "merchant"]) writer.writeheader() for txn in transactions: writer.writerow(prepare_row(txn)) # transactions.csv:
# txn_id,created_at,amount,currency,merchant
# a1b2c3d4-e5f6-7890-abcd-ef1234567890,2026-03-15T09:30:00+00:00,1249.99,USD,CloudHost Inc.
# b2c3d4e5-f6a7-8901-bcde-f12345678901,2026-03-15T14:12:00+00:00,87.50,EUR,DataSync GmbH
# Python 3.8+ — pre-process datetime and Decimal before CSV write
import json
import csv
from datetime import datetime, timezone
from decimal import Decimal
from uuid import UUID # Simulating parsed API response with Python types
transactions = [ { "txn_id": UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"), "created_at": datetime(2026, 3, 15, 9, 30, 0, tzinfo=timezone.utc), "amount": Decimal("1249.99"), "currency": "USD", "merchant": "CloudHost Inc.", }, { "txn_id": UUID("b2c3d4e5-f6a7-8901-bcde-f12345678901"), "created_at": datetime(2026, 3, 15, 14, 12, 0, tzinfo=timezone.utc), "amount": Decimal("87.50"), "currency": "EUR", "merchant": "DataSync GmbH", },
] def prepare_row(record: dict) -> dict: """Convert non-string types to CSV-friendly strings.""" return { "txn_id": str(record["txn_id"]), "created_at": record["created_at"].isoformat(), "amount": f"{record['amount']:.2f}", "currency": record["currency"], "merchant": record["merchant"], } with open("transactions.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["txn_id", "created_at", "amount", "currency", "merchant"]) writer.writeheader() for txn in transactions: writer.writerow(prepare_row(txn)) # transactions.csv:
# txn_id,created_at,amount,currency,merchant
# a1b2c3d4-e5f6-7890-abcd-ef1234567890,2026-03-15T09:30:00+00:00,1249.99,USD,CloudHost Inc.
# b2c3d4e5-f6a7-8901-bcde-f12345678901,2026-03-15T14:12:00+00:00,87.50,EUR,DataSync GmbH
# Python 3.8+ — pre-process datetime and Decimal before CSV write
import json
import csv
from datetime import datetime, timezone
from decimal import Decimal
from uuid import UUID # Simulating parsed API response with Python types
transactions = [ { "txn_id": UUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"), "created_at": datetime(2026, 3, 15, 9, 30, 0, tzinfo=timezone.utc), "amount": Decimal("1249.99"), "currency": "USD", "merchant": "CloudHost Inc.", }, { "txn_id": UUID("b2c3d4e5-f6a7-8901-bcde-f12345678901"), "created_at": datetime(2026, 3, 15, 14, 12, 0, tzinfo=timezone.utc), "amount": Decimal("87.50"), "currency": "EUR", "merchant": "DataSync GmbH", },
] def prepare_row(record: dict) -> dict: """Convert non-string types to CSV-friendly strings.""" return { "txn_id": str(record["txn_id"]), "created_at": record["created_at"].isoformat(), "amount": f"{record['amount']:.2f}", "currency": record["currency"], "merchant": record["merchant"], } with open("transactions.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["txn_id", "created_at", "amount", "currency", "merchant"]) writer.writeheader() for txn in transactions: writer.writerow(prepare_row(txn)) # transactions.csv:
# txn_id,created_at,amount,currency,merchant
# a1b2c3d4-e5f6-7890-abcd-ef1234567890,2026-03-15T09:30:00+00:00,1249.99,USD,CloudHost Inc.
# b2c3d4e5-f6a7-8901-bcde-f12345678901,2026-03-15T14:12:00+00:00,87.50,EUR,DataSync GmbH
# Python 3.8+ — pandas read_json then to_csv
import pandas as pd # Read JSON array directly into a DataFrame
df = pd.read_json("warehouse_inventory.json") # Write to CSV — index=False prevents the auto-generated row numbers
df.to_csv("warehouse_inventory.csv", index=False) # That's it. Two lines. pandas infers column types automatically.
# Python 3.8+ — pandas read_json then to_csv
import pandas as pd # Read JSON array directly into a DataFrame
df = pd.read_json("warehouse_inventory.json") # Write to CSV — index=False prevents the auto-generated row numbers
df.to_csv("warehouse_inventory.csv", index=False) # That's it. Two lines. pandas infers column types automatically.
# Python 3.8+ — pandas read_json then to_csv
import pandas as pd # Read JSON array directly into a DataFrame
df = pd.read_json("warehouse_inventory.json") # Write to CSV — index=False prevents the auto-generated row numbers
df.to_csv("warehouse_inventory.csv", index=False) # That's it. Two lines. pandas infers column types automatically.
# Python 3.8+ — flatten nested JSON using json_normalize
import json
import pandas as pd api_response = """
[ { "order_id": "ord_91a3", "placed_at": "2026-03-15T09:30:00Z", "customer": { "name": "Sarah Chen", "email": "[email protected]", "tier": "premium" }, "shipping": { "method": "express", "address": { "city": "Portland", "state": "OR", "zip": "97201" } }, "total": 299.95 }, { "order_id": "ord_b7f2", "placed_at": "2026-03-15T14:12:00Z", "customer": { "name": "James Park", "email": "[email protected]", "tier": "standard" }, "shipping": { "method": "standard", "address": { "city": "Austin", "state": "TX", "zip": "73301" } }, "total": 87.50 }
]
""" orders = json.loads(api_response) # json_normalize flattens nested dicts — sep controls the delimiter
df = pd.json_normalize(orders, sep="_")
df.to_csv("flat_orders.csv", index=False) # Resulting columns:
# order_id, placed_at, customer_name, customer_email, customer_tier,
# shipping_method, shipping_address_city, shipping_address_state,
# shipping_address_zip, total
# Python 3.8+ — flatten nested JSON using json_normalize
import json
import pandas as pd api_response = """
[ { "order_id": "ord_91a3", "placed_at": "2026-03-15T09:30:00Z", "customer": { "name": "Sarah Chen", "email": "[email protected]", "tier": "premium" }, "shipping": { "method": "express", "address": { "city": "Portland", "state": "OR", "zip": "97201" } }, "total": 299.95 }, { "order_id": "ord_b7f2", "placed_at": "2026-03-15T14:12:00Z", "customer": { "name": "James Park", "email": "[email protected]", "tier": "standard" }, "shipping": { "method": "standard", "address": { "city": "Austin", "state": "TX", "zip": "73301" } }, "total": 87.50 }
]
""" orders = json.loads(api_response) # json_normalize flattens nested dicts — sep controls the delimiter
df = pd.json_normalize(orders, sep="_")
df.to_csv("flat_orders.csv", index=False) # Resulting columns:
# order_id, placed_at, customer_name, customer_email, customer_tier,
# shipping_method, shipping_address_city, shipping_address_state,
# shipping_address_zip, total
# Python 3.8+ — flatten nested JSON using json_normalize
import json
import pandas as pd api_response = """
[ { "order_id": "ord_91a3", "placed_at": "2026-03-15T09:30:00Z", "customer": { "name": "Sarah Chen", "email": "[email protected]", "tier": "premium" }, "shipping": { "method": "express", "address": { "city": "Portland", "state": "OR", "zip": "97201" } }, "total": 299.95 }, { "order_id": "ord_b7f2", "placed_at": "2026-03-15T14:12:00Z", "customer": { "name": "James Park", "email": "[email protected]", "tier": "standard" }, "shipping": { "method": "standard", "address": { "city": "Austin", "state": "TX", "zip": "73301" } }, "total": 87.50 }
]
""" orders = json.loads(api_response) # json_normalize flattens nested dicts — sep controls the delimiter
df = pd.json_normalize(orders, sep="_")
df.to_csv("flat_orders.csv", index=False) # Resulting columns:
# order_id, placed_at, customer_name, customer_email, customer_tier,
# shipping_method, shipping_address_city, shipping_address_state,
# shipping_address_zip, total
# Python 3.8+ — to_csv with common parameter overrides
import pandas as pd df = pd.read_json("telemetry_events.json") # TSV output with explicit encoding and missing value handling
df.to_csv( "telemetry_events.tsv", sep="\t", index=False, encoding="utf-8", na_rep="NULL", columns=["event_id", "timestamp", "source", "severity", "message"],
) # Write to stdout for piping in shell scripts
print(df.to_csv(index=False)) # Return as string (no file written)
csv_string = df.to_csv(index=False)
print(len(csv_string), "characters")
# Python 3.8+ — to_csv with common parameter overrides
import pandas as pd df = pd.read_json("telemetry_events.json") # TSV output with explicit encoding and missing value handling
df.to_csv( "telemetry_events.tsv", sep="\t", index=False, encoding="utf-8", na_rep="NULL", columns=["event_id", "timestamp", "source", "severity", "message"],
) # Write to stdout for piping in shell scripts
print(df.to_csv(index=False)) # Return as string (no file written)
csv_string = df.to_csv(index=False)
print(len(csv_string), "characters")
# Python 3.8+ — to_csv with common parameter overrides
import pandas as pd df = pd.read_json("telemetry_events.json") # TSV output with explicit encoding and missing value handling
df.to_csv( "telemetry_events.tsv", sep="\t", index=False, encoding="utf-8", na_rep="NULL", columns=["event_id", "timestamp", "source", "severity", "message"],
) # Write to stdout for piping in shell scripts
print(df.to_csv(index=False)) # Return as string (no file written)
csv_string = df.to_csv(index=False)
print(len(csv_string), "characters")
# Python 3.8+ — convert JSON file to CSV with error handling
import json
import csv
import sys def json_file_to_csv(input_path: str, output_path: str) -> int: """Convert a JSON file containing an array of objects to CSV. Returns the number of rows written. """ try: with open(input_path, encoding="utf-8") as jf: data = json.load(jf) except FileNotFoundError: print(f"Error: {input_path} not found", file=sys.stderr) return 0 except json.JSONDecodeError as exc: print(f"Error: invalid JSON in {input_path}: {exc.msg} at line {exc.lineno}", file=sys.stderr) return 0 if not isinstance(data, list) or not data: print(f"Error: expected a non-empty JSON array in {input_path}", file=sys.stderr) return 0 # Collect all unique keys across all records — handles inconsistent schemas all_keys: list[str] = [] seen: set[str] = set() for record in data: for key in record: if key not in seen: all_keys.append(key) seen.add(key) with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=all_keys, restval="", extrasaction="ignore") writer.writeheader() writer.writerows(data) return len(data) rows = json_file_to_csv("deploy_logs.json", "deploy_logs.csv")
print(f"Wrote {rows} rows to deploy_logs.csv")
# Python 3.8+ — convert JSON file to CSV with error handling
import json
import csv
import sys def json_file_to_csv(input_path: str, output_path: str) -> int: """Convert a JSON file containing an array of objects to CSV. Returns the number of rows written. """ try: with open(input_path, encoding="utf-8") as jf: data = json.load(jf) except FileNotFoundError: print(f"Error: {input_path} not found", file=sys.stderr) return 0 except json.JSONDecodeError as exc: print(f"Error: invalid JSON in {input_path}: {exc.msg} at line {exc.lineno}", file=sys.stderr) return 0 if not isinstance(data, list) or not data: print(f"Error: expected a non-empty JSON array in {input_path}", file=sys.stderr) return 0 # Collect all unique keys across all records — handles inconsistent schemas all_keys: list[str] = [] seen: set[str] = set() for record in data: for key in record: if key not in seen: all_keys.append(key) seen.add(key) with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=all_keys, restval="", extrasaction="ignore") writer.writeheader() writer.writerows(data) return len(data) rows = json_file_to_csv("deploy_logs.json", "deploy_logs.csv")
print(f"Wrote {rows} rows to deploy_logs.csv")
# Python 3.8+ — convert JSON file to CSV with error handling
import json
import csv
import sys def json_file_to_csv(input_path: str, output_path: str) -> int: """Convert a JSON file containing an array of objects to CSV. Returns the number of rows written. """ try: with open(input_path, encoding="utf-8") as jf: data = json.load(jf) except FileNotFoundError: print(f"Error: {input_path} not found", file=sys.stderr) return 0 except json.JSONDecodeError as exc: print(f"Error: invalid JSON in {input_path}: {exc.msg} at line {exc.lineno}", file=sys.stderr) return 0 if not isinstance(data, list) or not data: print(f"Error: expected a non-empty JSON array in {input_path}", file=sys.stderr) return 0 # Collect all unique keys across all records — handles inconsistent schemas all_keys: list[str] = [] seen: set[str] = set() for record in data: for key in record: if key not in seen: all_keys.append(key) seen.add(key) with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=all_keys, restval="", extrasaction="ignore") writer.writeheader() writer.writerows(data) return len(data) rows = json_file_to_csv("deploy_logs.json", "deploy_logs.csv")
print(f"Wrote {rows} rows to deploy_logs.csv")
# Python 3.8+ — fetch JSON from API and save as CSV
import json
import csv
import urllib.request
import urllib.error def api_response_to_csv(url: str, output_path: str) -> int: """Fetch JSON from a REST API endpoint and write it as CSV.""" try: req = urllib.request.Request(url, headers={"Accept": "application/json"}) with urllib.request.urlopen(req, timeout=30) as resp: if resp.status != 200: print(f"Error: API returned status {resp.status}") return 0 body = resp.read().decode("utf-8") except urllib.error.URLError as exc: print(f"Error: could not reach {url}: {exc.reason}") return 0 try: records = json.loads(body) except json.JSONDecodeError as exc: print(f"Error: API returned invalid JSON: {exc.msg}") return 0 if not isinstance(records, list) or not records: print("Error: expected a non-empty JSON array from the API") return 0 with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) return len(records)
# Python 3.8+ — fetch JSON from API and save as CSV
import json
import csv
import urllib.request
import urllib.error def api_response_to_csv(url: str, output_path: str) -> int: """Fetch JSON from a REST API endpoint and write it as CSV.""" try: req = urllib.request.Request(url, headers={"Accept": "application/json"}) with urllib.request.urlopen(req, timeout=30) as resp: if resp.status != 200: print(f"Error: API returned status {resp.status}") return 0 body = resp.read().decode("utf-8") except urllib.error.URLError as exc: print(f"Error: could not reach {url}: {exc.reason}") return 0 try: records = json.loads(body) except json.JSONDecodeError as exc: print(f"Error: API returned invalid JSON: {exc.msg}") return 0 if not isinstance(records, list) or not records: print("Error: expected a non-empty JSON array from the API") return 0 with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) return len(records)
# Python 3.8+ — fetch JSON from API and save as CSV
import json
import csv
import urllib.request
import urllib.error def api_response_to_csv(url: str, output_path: str) -> int: """Fetch JSON from a REST API endpoint and write it as CSV.""" try: req = urllib.request.Request(url, headers={"Accept": "application/json"}) with urllib.request.urlopen(req, timeout=30) as resp: if resp.status != 200: print(f"Error: API returned status {resp.status}") return 0 body = resp.read().decode("utf-8") except urllib.error.URLError as exc: print(f"Error: could not reach {url}: {exc.reason}") return 0 try: records = json.loads(body) except json.JSONDecodeError as exc: print(f"Error: API returned invalid JSON: {exc.msg}") return 0 if not isinstance(records, list) or not records: print("Error: expected a non-empty JSON array from the API") return 0 with open(output_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records) return len(records)
# Python one-liner: reads JSON from stdin, writes CSV to stdout
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" # Save output to a file
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" > orders.csv
# Python one-liner: reads JSON from stdin, writes CSV to stdout
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" # Save output to a file
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" > orders.csv
# Python one-liner: reads JSON from stdin, writes CSV to stdout
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" # Save output to a file
cat orders.json | python3 -c "
import json, csv, sys
data = json.load(sys.stdin)
w = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
w.writeheader()
w.writerows(data)
" > orders.csv
# Using jq + csvkit for complex transformations
# Install csvkit: pip install csvkit # jq flattens and selects fields, in2csv handles the CSV formatting
cat api_response.json | jq '[.[] | {id: .order_id, customer: .customer.name, total}]' | in2csv -f json > orders.csv # Miller (mlr) is another option for JSON-to-CSV
mlr --json2csv cat orders.json > orders.csv
# Using jq + csvkit for complex transformations
# Install csvkit: pip install csvkit # jq flattens and selects fields, in2csv handles the CSV formatting
cat api_response.json | jq '[.[] | {id: .order_id, customer: .customer.name, total}]' | in2csv -f json > orders.csv # Miller (mlr) is another option for JSON-to-CSV
mlr --json2csv cat orders.json > orders.csv
# Using jq + csvkit for complex transformations
# Install csvkit: pip install csvkit # jq flattens and selects fields, in2csv handles the CSV formatting
cat api_response.json | jq '[.[] | {id: .order_id, customer: .customer.name, total}]' | in2csv -f json > orders.csv # Miller (mlr) is another option for JSON-to-CSV
mlr --json2csv cat orders.json > orders.csv
pip install pyarrow
pip install pyarrow
pip install pyarrow
# Python 3.8+ — pandas with pyarrow for faster CSV writing
import pandas as pd # Read JSON with pyarrow engine (faster parsing for large files)
df = pd.read_json("sensor_readings.json", engine="pyarrow") # to_csv doesn't have an engine parameter, but the DataFrame operations
# between read and write benefit from pyarrow's columnar layout
df.to_csv("sensor_readings.csv", index=False) # For truly large exports, consider writing to Parquet instead of CSV
# — binary format, 5-10x smaller, preserves types
df.to_parquet("sensor_readings.parquet", engine="pyarrow")
# Python 3.8+ — pandas with pyarrow for faster CSV writing
import pandas as pd # Read JSON with pyarrow engine (faster parsing for large files)
df = pd.read_json("sensor_readings.json", engine="pyarrow") # to_csv doesn't have an engine parameter, but the DataFrame operations
# between read and write benefit from pyarrow's columnar layout
df.to_csv("sensor_readings.csv", index=False) # For truly large exports, consider writing to Parquet instead of CSV
# — binary format, 5-10x smaller, preserves types
df.to_parquet("sensor_readings.parquet", engine="pyarrow")
# Python 3.8+ — pandas with pyarrow for faster CSV writing
import pandas as pd # Read JSON with pyarrow engine (faster parsing for large files)
df = pd.read_json("sensor_readings.json", engine="pyarrow") # to_csv doesn't have an engine parameter, but the DataFrame operations
# between read and write benefit from pyarrow's columnar layout
df.to_csv("sensor_readings.csv", index=False) # For truly large exports, consider writing to Parquet instead of CSV
# — binary format, 5-10x smaller, preserves types
df.to_parquet("sensor_readings.parquet", engine="pyarrow")
pip install ijson
pip install ijson
pip install ijson
# Python 3.8+ — stream large JSON array to CSV with constant memory
import ijson
import csv def stream_json_to_csv(json_path: str, csv_path: str) -> int: """Convert a large JSON array to CSV without loading it all into memory.""" with open(json_path, "rb") as jf, open(csv_path, "w", newline="", encoding="utf-8") as cf: # ijson.items yields each element of the top-level array one at a time records = ijson.items(jf, "item") first_record = next(records) fieldnames = list(first_record.keys()) writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for record in records: writer.writerow(record) count += 1 return count rows = stream_json_to_csv("clickstream_2026_03.json", "clickstream_2026_03.csv")
print(f"Streamed {rows} records to CSV")
# Python 3.8+ — stream large JSON array to CSV with constant memory
import ijson
import csv def stream_json_to_csv(json_path: str, csv_path: str) -> int: """Convert a large JSON array to CSV without loading it all into memory.""" with open(json_path, "rb") as jf, open(csv_path, "w", newline="", encoding="utf-8") as cf: # ijson.items yields each element of the top-level array one at a time records = ijson.items(jf, "item") first_record = next(records) fieldnames = list(first_record.keys()) writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for record in records: writer.writerow(record) count += 1 return count rows = stream_json_to_csv("clickstream_2026_03.json", "clickstream_2026_03.csv")
print(f"Streamed {rows} records to CSV")
# Python 3.8+ — stream large JSON array to CSV with constant memory
import ijson
import csv def stream_json_to_csv(json_path: str, csv_path: str) -> int: """Convert a large JSON array to CSV without loading it all into memory.""" with open(json_path, "rb") as jf, open(csv_path, "w", newline="", encoding="utf-8") as cf: # ijson.items yields each element of the top-level array one at a time records = ijson.items(jf, "item") first_record = next(records) fieldnames = list(first_record.keys()) writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for record in records: writer.writerow(record) count += 1 return count rows = stream_json_to_csv("clickstream_2026_03.json", "clickstream_2026_03.csv")
print(f"Streamed {rows} records to CSV")
# Python 3.8+ — convert NDJSON to CSV line by line
import json
import csv def ndjson_to_csv(ndjson_path: str, csv_path: str) -> int: """Convert a newline-delimited JSON file to CSV, one line at a time.""" with open(ndjson_path, encoding="utf-8") as nf: first_line = nf.readline() first_record = json.loads(first_line) fieldnames = list(first_record.keys()) with open(csv_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for line in nf: line = line.strip() if not line: continue try: record = json.loads(line) writer.writerow(record) count += 1 except json.JSONDecodeError: continue # skip malformed lines return count rows = ndjson_to_csv("access_log.ndjson", "access_log.csv")
print(f"Converted {rows} log entries to CSV")
# Python 3.8+ — convert NDJSON to CSV line by line
import json
import csv def ndjson_to_csv(ndjson_path: str, csv_path: str) -> int: """Convert a newline-delimited JSON file to CSV, one line at a time.""" with open(ndjson_path, encoding="utf-8") as nf: first_line = nf.readline() first_record = json.loads(first_line) fieldnames = list(first_record.keys()) with open(csv_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for line in nf: line = line.strip() if not line: continue try: record = json.loads(line) writer.writerow(record) count += 1 except json.JSONDecodeError: continue # skip malformed lines return count rows = ndjson_to_csv("access_log.ndjson", "access_log.csv")
print(f"Converted {rows} log entries to CSV")
# Python 3.8+ — convert NDJSON to CSV line by line
import json
import csv def ndjson_to_csv(ndjson_path: str, csv_path: str) -> int: """Convert a newline-delimited JSON file to CSV, one line at a time.""" with open(ndjson_path, encoding="utf-8") as nf: first_line = nf.readline() first_record = json.loads(first_line) fieldnames = list(first_record.keys()) with open(csv_path, "w", newline="", encoding="utf-8") as cf: writer = csv.DictWriter(cf, fieldnames=fieldnames) writer.writeheader() writer.writerow(first_record) count = 1 for line in nf: line = line.strip() if not line: continue try: record = json.loads(line) writer.writerow(record) count += 1 except json.JSONDecodeError: continue # skip malformed lines return count rows = ndjson_to_csv("access_log.ndjson", "access_log.csv")
print(f"Converted {rows} log entries to CSV")
# Before — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data) # After — clean output on all platforms
with open("output.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data)
# Before — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data) # After — clean output on all platforms
with open("output.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data)
# Before — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data) # After — clean output on all platforms
with open("output.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=columns) writer.writeheader() writer.writerows(data)
# Before
df = pd.read_json("events.json")
df.to_csv("events.csv")
# CSV gets an extra unnamed column: ,event_id,timestamp,... # After
df = pd.read_json("events.json")
df.to_csv("events.csv", index=False)
# Clean CSV: event_id,timestamp,...
# Before
df = pd.read_json("events.json")
df.to_csv("events.csv")
# CSV gets an extra unnamed column: ,event_id,timestamp,... # After
df = pd.read_json("events.json")
df.to_csv("events.csv", index=False)
# Clean CSV: event_id,timestamp,...
# Before
df = pd.read_json("events.json")
df.to_csv("events.csv")
# CSV gets an extra unnamed column: ,event_id,timestamp,... # After
df = pd.read_json("events.json")
df.to_csv("events.csv", index=False)
# Clean CSV: event_id,timestamp,...
# Before
records = json.load(f)
writer = csv.DictWriter(out, fieldnames=records[0].keys())
# Misses "discount" field that only appears in records[2] # After
records = json.load(f)
all_keys = list(dict.fromkeys(k for r in records for k in r))
writer = csv.DictWriter(out, fieldnames=all_keys, restval="")
# Every key from every record is included as a column
# Before
records = json.load(f)
writer = csv.DictWriter(out, fieldnames=records[0].keys())
# Misses "discount" field that only appears in records[2] # After
records = json.load(f)
all_keys = list(dict.fromkeys(k for r in records for k in r))
writer = csv.DictWriter(out, fieldnames=all_keys, restval="")
# Every key from every record is included as a column
# Before
records = json.load(f)
writer = csv.DictWriter(out, fieldnames=records[0].keys())
# Misses "discount" field that only appears in records[2] # After
records = json.load(f)
all_keys = list(dict.fromkeys(k for r in records for k in r))
writer = csv.DictWriter(out, fieldnames=all_keys, restval="")
# Every key from every record is included as a column
# Before
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
writer = csv.DictWriter(f, fieldnames=["id", "meta"])
writer.writerows(records)
# meta column contains: {'source': 'web', 'region': 'us-west'} # After
import pandas as pd
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
df = pd.json_normalize(records, sep="_")
df.to_csv("events.csv", index=False)
# Columns: id, meta_source, meta_region
# Before
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
writer = csv.DictWriter(f, fieldnames=["id", "meta"])
writer.writerows(records)
# meta column contains: {'source': 'web', 'region': 'us-west'} # After
import pandas as pd
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
df = pd.json_normalize(records, sep="_")
df.to_csv("events.csv", index=False)
# Columns: id, meta_source, meta_region
# Before
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
writer = csv.DictWriter(f, fieldnames=["id", "meta"])
writer.writerows(records)
# meta column contains: {'source': 'web', 'region': 'us-west'} # After
import pandas as pd
records = [{"id": "evt_1", "meta": {"source": "web", "region": "us-west"}}]
df = pd.json_normalize(records, sep="_")
df.to_csv("events.csv", index=False)
# Columns: id, meta_source, meta_region
import json
import csv with open("orders.json") as f: records = json.load(f) with open("orders.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records)
import json
import csv with open("orders.json") as f: records = json.load(f) with open("orders.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records)
import json
import csv with open("orders.json") as f: records = json.load(f) with open("orders.csv", "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=records[0].keys()) writer.writeheader() writer.writerows(records)
import pandas as pd nested_data = [ {"id": "ord_91a3", "customer": {"name": "Sarah Chen", "email": "[email protected]"}},
]
df = pd.json_normalize(nested_data, sep="_")
# Columns: id, customer_name, customer_email
df.to_csv("flat_orders.csv", index=False)
import pandas as pd nested_data = [ {"id": "ord_91a3", "customer": {"name": "Sarah Chen", "email": "[email protected]"}},
]
df = pd.json_normalize(nested_data, sep="_")
# Columns: id, customer_name, customer_email
df.to_csv("flat_orders.csv", index=False)
import pandas as pd nested_data = [ {"id": "ord_91a3", "customer": {"name": "Sarah Chen", "email": "[email protected]"}},
]
df = pd.json_normalize(nested_data, sep="_")
# Columns: id, customer_name, customer_email
df.to_csv("flat_orders.csv", index=False)
# Wrong — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.writer(f) # Correct — newline="" prevents double \r
with open("output.csv", "w", newline="") as f: writer = csv.writer(f)
# Wrong — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.writer(f) # Correct — newline="" prevents double \r
with open("output.csv", "w", newline="") as f: writer = csv.writer(f)
# Wrong — blank rows on Windows
with open("output.csv", "w") as f: writer = csv.writer(f) # Correct — newline="" prevents double \r
with open("output.csv", "w", newline="") as f: writer = csv.writer(f)
import csv new_records = [ {"order_id": "ord_f4c1", "total": 89.50, "status": "shipped"},
] with open("orders.csv", "a", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["order_id", "total", "status"]) writer.writerows(new_records)
import csv new_records = [ {"order_id": "ord_f4c1", "total": 89.50, "status": "shipped"},
] with open("orders.csv", "a", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["order_id", "total", "status"]) writer.writerows(new_records)
import csv new_records = [ {"order_id": "ord_f4c1", "total": 89.50, "status": "shipped"},
] with open("orders.csv", "a", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=["order_id", "total", "status"]) writer.writerows(new_records)
# Fast for files that fit in memory
import pandas as pd
df = pd.read_json("large_dataset.json")
df.to_csv("large_dataset.csv", index=False) # Streaming for files that don't fit in memory
import ijson, csv
with open("huge.json", "rb") as jf, open("huge.csv", "w", newline="") as cf: records = ijson.items(jf, "item") first = next(records) writer = csv.DictWriter(cf, fieldnames=first.keys()) writer.writeheader() writer.writerow(first) for record in records: writer.writerow(record)
# Fast for files that fit in memory
import pandas as pd
df = pd.read_json("large_dataset.json")
df.to_csv("large_dataset.csv", index=False) # Streaming for files that don't fit in memory
import ijson, csv
with open("huge.json", "rb") as jf, open("huge.csv", "w", newline="") as cf: records = ijson.items(jf, "item") first = next(records) writer = csv.DictWriter(cf, fieldnames=first.keys()) writer.writeheader() writer.writerow(first) for record in records: writer.writerow(record)
# Fast for files that fit in memory
import pandas as pd
df = pd.read_json("large_dataset.json")
df.to_csv("large_dataset.csv", index=False) # Streaming for files that don't fit in memory
import ijson, csv
with open("huge.json", "rb") as jf, open("huge.csv", "w", newline="") as cf: records = ijson.items(jf, "item") first = next(records) writer = csv.DictWriter(cf, fieldnames=first.keys()) writer.writeheader() writer.writerow(first) for record in records: writer.writerow(record)
import csv
import sys
import json data = json.loads('[{"host":"web-1","cpu":72.3},{"host":"web-2","cpu":45.1}]')
writer = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
# host,cpu
# web-1,72.3
# web-2,45.1
import csv
import sys
import json data = json.loads('[{"host":"web-1","cpu":72.3},{"host":"web-2","cpu":45.1}]')
writer = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
# host,cpu
# web-1,72.3
# web-2,45.1
import csv
import sys
import json data = json.loads('[{"host":"web-1","cpu":72.3},{"host":"web-2","cpu":45.1}]')
writer = csv.DictWriter(sys.stdout, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
# host,cpu
# web-1,72.3
# web-2,45.1 - csv.DictWriter converts a list of dicts to CSV with zero dependencies — use json.load() to parse, then writeheader() + writerows().
- Always open CSV files with newline="" on Windows to prevent blank rows between data rows.
- pd.json_normalize() flattens nested JSON into a flat DataFrame before calling to_csv() — handles multi-level nesting automatically.
- Pass index=False to DataFrame.to_csv() — without it, pandas writes an unwanted row-number column.
- For files over 500 MB, use ijson for streaming JSON parsing combined with csv.DictWriter for constant memory usage.