Python File I/O Best Practices
Efficient file handling is crucial for data processing. Follow these best practices for reading, writing, and managing files safely in Python.
Reading Files
Read Entire File
# Always use 'with' for automatic closing
with open('data.txt', 'r') as f:
content = f.read() # Returns entire file as string
Read Line by Line (Memory Efficient)
with open('large_file.txt', 'r') as f:
for line in f: # Reads one line at a time
process(line.strip())
Read All Lines into List
with open('data.txt', 'r') as f:
lines = f.readlines() # Returns list of lines
# or
lines = f.read().splitlines() # Removes \n characters
Writing Files
Write Text
# 'w' mode overwrites existing file
with open('output.txt', 'w') as f:
f.write('Hello, World!\n')
f.write('Second line\n')
# 'a' mode appends to existing file
with open('log.txt', 'a') as f:
f.write('New log entry\n')
Write Multiple Lines
lines = ['Line 1\n', 'Line 2\n', 'Line 3\n']
with open('output.txt', 'w') as f:
f.writelines(lines)
# or
f.write(''.join(lines))
Handling Encodings
# Specify encoding explicitly (avoid platform issues)
with open('data.txt', 'r', encoding='utf-8') as f:
content = f.read()
# Handle encoding errors
with open('messy_data.txt', 'r', encoding='utf-8', errors='ignore') as f:
content = f.read() # Skips invalid characters
Working with Binary Files
# Read binary data (images, PDFs, etc.)
with open('image.png', 'rb') as f:
binary_data = f.read()
# Write binary data
with open('copy.png', 'wb') as f:
f.write(binary_data)
Reading Large Files Efficiently
# Read in chunks to avoid memory issues
def read_in_chunks(file_path, chunk_size=1024):
with open(file_path, 'r') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
for chunk in read_in_chunks('huge_file.txt'):
process(chunk)
File Operations with Pathlib
from pathlib import Path
# Modern, object-oriented approach
file = Path('data.txt')
# Read
content = file.read_text(encoding='utf-8')
binary = file.read_bytes()
# Write
file.write_text('Hello, World!', encoding='utf-8')
# Check existence
if file.exists():
print("File found")
CSV Files
import csv
# Reading CSV
with open('data.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
print(row['name'], row['age'])
# Writing CSV
with open('output.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['name', 'age'])
writer.writeheader()
writer.writerow({'name': 'Alice', 'age': 30})
Best Practices Checklist
- Always use 'with' statement for automatic closing
- Specify encoding explicitly (usually 'utf-8')
- Use line-by-line reading for large files
- Use pathlib for path operations
- Handle encoding errors gracefully
Pro Tip: For large files, always iterate line-by-line instead of loading the entire file into memory. Use pathlib for modern, cross-platform path handling.
← Back to Python Tips