Useful Data Tips

Python File I/O Best Practices

⏱️ 30 sec read 🐍 Python

Efficient file handling is crucial for data processing. Follow these best practices for reading, writing, and managing files safely in Python.

Reading Files

Read Entire File

# Always use 'with' for automatic closing
with open('data.txt', 'r') as f:
    content = f.read()  # Returns entire file as string

Read Line by Line (Memory Efficient)

with open('large_file.txt', 'r') as f:
    for line in f:  # Reads one line at a time
        process(line.strip())

Read All Lines into List

with open('data.txt', 'r') as f:
    lines = f.readlines()  # Returns list of lines
    # or
    lines = f.read().splitlines()  # Removes \n characters

Writing Files

Write Text

# 'w' mode overwrites existing file
with open('output.txt', 'w') as f:
    f.write('Hello, World!\n')
    f.write('Second line\n')

# 'a' mode appends to existing file
with open('log.txt', 'a') as f:
    f.write('New log entry\n')

Write Multiple Lines

lines = ['Line 1\n', 'Line 2\n', 'Line 3\n']

with open('output.txt', 'w') as f:
    f.writelines(lines)
    # or
    f.write(''.join(lines))

Handling Encodings

# Specify encoding explicitly (avoid platform issues)
with open('data.txt', 'r', encoding='utf-8') as f:
    content = f.read()

# Handle encoding errors
with open('messy_data.txt', 'r', encoding='utf-8', errors='ignore') as f:
    content = f.read()  # Skips invalid characters

Working with Binary Files

# Read binary data (images, PDFs, etc.)
with open('image.png', 'rb') as f:
    binary_data = f.read()

# Write binary data
with open('copy.png', 'wb') as f:
    f.write(binary_data)

Reading Large Files Efficiently

# Read in chunks to avoid memory issues
def read_in_chunks(file_path, chunk_size=1024):
    with open(file_path, 'r') as f:
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            yield chunk

for chunk in read_in_chunks('huge_file.txt'):
    process(chunk)

File Operations with Pathlib

from pathlib import Path

# Modern, object-oriented approach
file = Path('data.txt')

# Read
content = file.read_text(encoding='utf-8')
binary = file.read_bytes()

# Write
file.write_text('Hello, World!', encoding='utf-8')

# Check existence
if file.exists():
    print("File found")

CSV Files

import csv

# Reading CSV
with open('data.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row['name'], row['age'])

# Writing CSV
with open('output.csv', 'w', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=['name', 'age'])
    writer.writeheader()
    writer.writerow({'name': 'Alice', 'age': 30})

Best Practices Checklist

Pro Tip: For large files, always iterate line-by-line instead of loading the entire file into memory. Use pathlib for modern, cross-platform path handling.

← Back to Python Tips