Every developer has tasks they repeat dozens of times daily: renaming files, moving data between folders, downloading reports, sending emails, updating spreadsheets. These tasks take minutes each but add up to hours every week. Python automation eliminates this drudgery.
This guide shows you how to build practical automation scripts. You will learn to automate file operations, scrape websites, process data, schedule tasks, and combine everything into tools you can use every day.
Why Automate with Python
Manual tasks have hidden costs. A task taking 2 minutes, done 20 times daily, consumes 40 minutes weekly. Over a year, that is 35 hours, nearly a full work week. Python automation turns these minutes into seconds.
The benefits compound. An automation script runs the same way every time, eliminating human error. You can run scripts overnight while you sleep. You can share scripts with colleagues, spreading the time savings.
File Automation Scripts
Batch File Operations
Organize messy directories automatically:
import os
import shutil
from pathlib import Path
from datetime import datetime
def organize_downloads(download_dir, rules=None):
"""Organize files in downloads folder by type and date."""
if rules is None:
rules = {
'images': ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
'documents': ['.pdf', '.doc', '.docx', '.txt', '.xlsx'],
'archives': ['.zip', '.tar', '.gz', '.7z'],
'scripts': ['.py', '.js', '.sh', '.sql'],
}
downloads = Path(download_dir)
today = datetime.now().strftime('%Y-%m-%d')
for file in downloads.iterdir():
if file.is_file() and not file.name.startswith('.'):
# Get file extension
ext = file.suffix.lower()
# Find matching category
category = 'others'
for cat, extensions in rules.items():
if ext in extensions:
category = cat
break
# Create dated folder
dest_dir = downloads / category / today
dest_dir.mkdir(parents=True, exist_ok=True)
# Move file
new_path = dest_dir / file.name
if new_path.exists():
# Avoid overwrites
stem = new_path.stem
counter = 1
while new_path.exists():
new_path = dest_dir / f"{stem}_{counter}{new_path.suffix}"
counter += 1
shutil.move(str(file), str(new_path))
print(f"Moved: {file.name} -> {category}/")
# Usage
organize_downloads(str(Path.home() / "Downloads"))
File Search and Processing
Find and modify files across directories:
import os
from pathlib import Path
import re
def find_and_replace(root_dir, pattern, replacement, file_types=None):
"""Find and replace text in files matching pattern."""
if file_types is None:
file_types = ['.py', '.txt', '.md', '.json', '.yaml', '.yml']
root = Path(root_dir)
changes = []
for file in root.rglob('*'):
if file.is_file() and file.suffix in file_types:
try:
content = file.read_text(encoding='utf-8')
new_content, count = re.subn(pattern, replacement, content)
if count > 0:
file.write_text(new_content, encoding='utf-8')
changes.append((str(file), count))
print(f"Updated: {file.name} ({count} replacements)")
except (UnicodeDecodeError, PermissionError):
continue
return changes
Web Scraping Automation
Automate data collection from websites:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import time
class WebsiteMonitor:
"""Monitor websites for changes and send notifications."""
def __init__(self, notification_webhook=None):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (compatible; AutomationBot/1.0)'
})
self.notification_webhook = notification_webhook
self.previous_content = {}
def fetch_page(self, url):
"""Fetch page content with error handling."""
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def extract_content(self, html, selectors):
"""Extract content using CSS selectors."""
soup = BeautifulSoup(html, 'html.parser')
content = {}
for name, selector in selectors.items():
element = soup.select_one(selector)
content[name] = element.text.strip() if element else None
return content
def check_for_changes(self, url, selectors, name):
"""Check if page content has changed."""
html = self.fetch_page(url)
if html is None:
return None
content = self.extract_content(html, selectors)
if name in self.previous_content:
old_content = self.previous_content[name]
if content != old_content:
self.notify(name, content, old_content)
self.previous_content[name] = content
return content
def notify(self, name, new_content, old_content):
"""Send notification about changes."""
changes = {}
for key in new_content:
if new_content[key] != old_content.get(key):
changes[key] = {
'from': old_content.get(key),
'to': new_content[key]
}
if changes and self.notification_webhook:
payload = {
'name': name,
'changes': changes,
'timestamp': datetime.now().isoformat()
}
self.session.post(self.notification_webhook, json=payload)
print(f"Changes detected in {name}: {list(changes.keys())}")
API-Based Data Collection
Automate data collection from APIs:
import requests
import json
from datetime import datetime
from pathlib import Path
class APICollector:
"""Collect and store data from REST APIs."""
def __init__(self, base_url, output_dir='data'):
self.base_url = base_url
self.session = requests.Session()
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
def fetch_all_pages(self, endpoint, params=None, max_pages=None):
"""Fetch all pages from a paginated API."""
all_data = []
page = 1
while True:
if max_pages and page > max_pages:
break
url = f"{self.base_url}/{endpoint}"
params = params or {}
params['page'] = page
try:
response = self.session.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if 'data' in data:
all_data.extend(data['data'])
else:
all_data.append(data)
# Check for more pages
if isinstance(data, dict) and 'pagination' in data:
if not data['pagination'].get('has_more', False):
break
page += 1
time.sleep(0.5) # Rate limiting
except requests.RequestException as e:
print(f"Error on page {page}: {e}")
break
return all_data
def save_to_json(self, data, filename):
"""Save collected data to JSON file."""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filepath = self.output_dir / f"{filename}_{timestamp}.json"
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Saved {len(data)} records to {filepath}")
return filepath
Data Processing Automation
Automate Excel and CSV processing:
import pandas as pd
from pathlib import Path
from datetime import datetime
import glob
class DataProcessor:
"""Automate data processing and transformation."""
def __init__(self, input_dir, output_dir):
self.input_dir = Path(input_dir)
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
def process_csv_files(self, processing_func, output_prefix=''):
"""Apply processing function to all CSV files."""
results = []
for csv_file in self.input_dir.glob('*.csv'):
print(f"Processing: {csv_file.name}")
try:
df = pd.read_csv(csv_file)
processed = processing_func(df)
if isinstance(processed, pd.DataFrame):
output_file = self.output_dir / f"{output_prefix}_{csv_file.stem}.csv"
processed.to_csv(output_file, index=False)
results.append(output_file)
print(f"Saved: {output_file.name}")
except Exception as e:
print(f"Error processing {csv_file.name}: {e}")
return results
def merge_and_summarize(self, glob_pattern, group_by_col, aggregations):
"""Merge multiple files and create summary."""
all_files = glob.glob(str(self.input_dir / glob_pattern))
if not all_files:
print("No files found matching pattern")
return None
# Load and concatenate all files
dfs = [pd.read_csv(f) for f in all_files]
combined = pd.concat(dfs, ignore_index=True)
# Create summary
summary = combined.groupby(group_by_col).agg(aggregations).reset_index()
# Add metadata
summary['processed_at'] = datetime.now().isoformat()
summary['source_files'] = len(all_files)
return summary
# Example processing function
def clean_sales_data(df):
"""Clean and transform sales data."""
df = df.copy()
# Standardize column names
df.columns = [c.lower().strip().replace(' ', '_') for c in df.columns]
# Convert dates
date_cols = [c for c in df.columns if 'date' in c.lower()]
for col in date_cols:
df[col] = pd.to_datetime(df[col], errors='coerce')
# Fill missing values
numeric_cols = df.select_dtypes(include=['number']).columns
df[numeric_cols] = df[numeric_cols].fillna(0)
# Add computed columns
df['year_month'] = df['date'].dt.to_period('M')
df['quarter'] = df['date'].dt.quarter
return df
Task Scheduling
Run automation scripts on a schedule:
Using Schedule Library
import schedule
import time
import os
def job_daily_backup():
"""Daily backup task."""
print("Starting backup...")
os.system("rsync -av /source /destination/")
print("Backup complete")
def job_weekly_report():
"""Generate weekly report."""
print("Generating weekly report...")
# Report generation code
print("Report sent")
def job_hourly_check():
"""Hourly system check."""
print(f"System check at {__import__('datetime').datetime.now()}")
# Schedule tasks
schedule.every().day.at("02:00").do(job_daily_backup)
schedule.every().monday.at("09:00").do(job_weekly_report)
schedule.every().hour.do(job_hourly_check)
# Run scheduler
if __name__ == "__main__":
print("Scheduler started")
while True:
schedule.run_pending()
time.sleep(30)
Using System Cron (Linux/macOS)
Create a cron job for Python scripts:
# Edit crontab
crontab -e
# Add entries (minute hour day month command)
# Run daily backup at 2 AM
0 2 * * * /usr/bin/python3 /path/to/backup_script.py >> /var/log/backup.log 2>&1
# Run hourly check every hour
0 * * * * /usr/bin/python3 /path/to/check_script.py
# Run every 15 minutes
*/15 * * * * /usr/bin/python3 /path/to/monitor_script.py
Using Windows Task Scheduler
# Create a scheduled task (run in PowerShell as Administrator)
$action = New-ScheduledTaskAction -Execute "python.exe" `
-Argument "C:\scripts\myscript.py"
$trigger = New-ScheduledTaskTrigger -Daily -At "2:00 AM"
$settings = New-ScheduledTaskSettingsSet -RunOnlyIfNetworkAvailable
Register-ScheduledTask -Action $action -Trigger $trigger `
-TaskName "DailyBackup" -Description "Run daily backup script" `
-Settings $settings -User "username" -Password "password"
Building CLI Automation Tools
Create command-line tools with argparse:
#!/usr/bin/env python3
"""
automation_tools.py - Collection of automation utilities
Usage:
python automation_tools.py organize <directory>
python automation_tools.py monitor <url> [--every SECONDS]
python automation_tools.py backup <source> <destination>
"""
import argparse
import sys
from pathlib import Path
def cmd_organize(args):
"""Organize files in a directory."""
from automation import organize_downloads
directory = Path(args.directory)
if not directory.exists():
print(f"Error: Directory {directory} does not exist")
sys.exit(1)
organize_downloads(str(directory))
print("Organization complete")
def cmd_backup(args):
"""Backup source to destination."""
import shutil
source = Path(args.source)
dest = Path(args.destination)
if not source.exists():
print(f"Error: Source {source} does not exist")
sys.exit(1)
dest.mkdir(parents=True, exist_ok=True)
shutil.copytree(source, dest / source.name, dirs_exist_ok=True)
print(f"Backup complete: {source} -> {dest}")
def cmd_monitor(args):
"""Monitor a URL for changes."""
from automation import WebsiteMonitor
import time
monitor = WebsiteMonitor()
selectors = {'content': args.selector} if args.selector else None
print(f"Monitoring {args.url} every {args.every} seconds...")
try:
while True:
result = monitor.check_for_changes(args.url, selectors or {}, "monitor")
if result:
print(f"Current content: {result}")
time.sleep(int(args.every))
except KeyboardInterrupt:
print("\nMonitoring stopped")
def main():
parser = argparse.ArgumentParser(
description="Automation tools for common tasks",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python automation_tools.py organize ./downloads
python automation_tools.py backup ./projects ./backups
python automation_tools.py monitor https://example.com --selector ".content"
"""
)
subparsers = parser.add_subparsers(dest='command', help='Available commands')
# Organize command
organize_parser = subparsers.add_parser('organize', help='Organize files by type')
organize_parser.add_argument('directory', help='Directory to organize')
# Backup command
backup_parser = subparsers.add_parser('backup', help='Backup directory')
backup_parser.add_argument('source', help='Source directory')
backup_parser.add_argument('destination', help='Destination directory')
# Monitor command
monitor_parser = subparsers.add_parser('monitor', help='Monitor URL for changes')
monitor_parser.add_argument('url', help='URL to monitor')
monitor_parser.add_argument('--every', default='60', help='Check interval in seconds')
monitor_parser.add_argument('--selector', help='CSS selector for content')
args = parser.parse_args()
if args.command == 'organize':
cmd_organize(args)
elif args.command == 'backup':
cmd_backup(args)
elif args.command == 'monitor':
cmd_monitor(args)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()
Error Handling and Logging
Reliable automation needs proper logging:
import logging
import sys
from pathlib import Path
from datetime import datetime
def setup_logging(name='automation', level=logging.INFO):
"""Configure logging for automation scripts."""
log_dir = Path('logs')
log_dir.mkdir(exist_ok=True)
log_file = log_dir / f"{name}_{datetime.now():%Y%m%d}.log"
logging.basicConfig(
level=level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stdout)
]
)
return logging.getLogger(name)
def run_with_logging(func):
"""Decorator to run function with logging and error handling."""
def wrapper(*args, **kwargs):
logger = setup_logging()
try:
logger.info(f"Starting {func.__name__}")
result = func(*args, **kwargs)
logger.info(f"Completed {func.__name__} successfully")
return result
except Exception as e:
logger.error(f"Error in {func.__name__}: {str(e)}", exc_info=True)
raise
return wrapper
Summary
Python automation turns tedious repetitive tasks into one-time scripts. Start with simple file operations, then move to web scraping, data processing, and scheduling. Build CLI tools for reusability. Add logging for reliability.
The scripts in this guide cover the most common automation scenarios. Modify them for your specific needs. The investment in automation pays back quickly when you reclaim hours of productive time.
For more automation examples, check our guides on Python scripting fundamentals and web scraping techniques.
Sources:
Discussion
Leave a comment
No comments yet
Be the first to start the conversation.