Python Task Automation: Complete Guide to Scripting Daily Tasks

Learn how to automate repetitive tasks with Python. Covers file management, web scraping, data processing, scheduling, and building automation tools that save hours of manual work.

Every developer has tasks they repeat dozens of times daily: renaming files, moving data between folders, downloading reports, sending emails, updating spreadsheets. These tasks take minutes each but add up to hours every week. Python automation eliminates this drudgery.

This guide shows you how to build practical automation scripts. You will learn to automate file operations, scrape websites, process data, schedule tasks, and combine everything into tools you can use every day.

Why Automate with Python

Manual tasks have hidden costs. A task taking 2 minutes, done 20 times daily, consumes 40 minutes weekly. Over a year, that is 35 hours, nearly a full work week. Python automation turns these minutes into seconds.

The benefits compound. An automation script runs the same way every time, eliminating human error. You can run scripts overnight while you sleep. You can share scripts with colleagues, spreading the time savings.

File Automation Scripts

Batch File Operations

Organize messy directories automatically:

import os
import shutil
from pathlib import Path
from datetime import datetime

def organize_downloads(download_dir, rules=None):
    """Organize files in downloads folder by type and date."""

    if rules is None:
        rules = {
            'images': ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
            'documents': ['.pdf', '.doc', '.docx', '.txt', '.xlsx'],
            'archives': ['.zip', '.tar', '.gz', '.7z'],
            'scripts': ['.py', '.js', '.sh', '.sql'],
        }

    downloads = Path(download_dir)
    today = datetime.now().strftime('%Y-%m-%d')

    for file in downloads.iterdir():
        if file.is_file() and not file.name.startswith('.'):
            # Get file extension
            ext = file.suffix.lower()

            # Find matching category
            category = 'others'
            for cat, extensions in rules.items():
                if ext in extensions:
                    category = cat
                    break

            # Create dated folder
            dest_dir = downloads / category / today
            dest_dir.mkdir(parents=True, exist_ok=True)

            # Move file
            new_path = dest_dir / file.name
            if new_path.exists():
                # Avoid overwrites
                stem = new_path.stem
                counter = 1
                while new_path.exists():
                    new_path = dest_dir / f"{stem}_{counter}{new_path.suffix}"
                    counter += 1

            shutil.move(str(file), str(new_path))
            print(f"Moved: {file.name} -> {category}/")

# Usage
organize_downloads(str(Path.home() / "Downloads"))

File Search and Processing

Find and modify files across directories:

import os
from pathlib import Path
import re

def find_and_replace(root_dir, pattern, replacement, file_types=None):
    """Find and replace text in files matching pattern."""

    if file_types is None:
        file_types = ['.py', '.txt', '.md', '.json', '.yaml', '.yml']

    root = Path(root_dir)
    changes = []

    for file in root.rglob('*'):
        if file.is_file() and file.suffix in file_types:
            try:
                content = file.read_text(encoding='utf-8')
                new_content, count = re.subn(pattern, replacement, content)

                if count > 0:
                    file.write_text(new_content, encoding='utf-8')
                    changes.append((str(file), count))
                    print(f"Updated: {file.name} ({count} replacements)")
            except (UnicodeDecodeError, PermissionError):
                continue

    return changes

Web Scraping Automation

Automate data collection from websites:

import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import time

class WebsiteMonitor:
    """Monitor websites for changes and send notifications."""

    def __init__(self, notification_webhook=None):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (compatible; AutomationBot/1.0)'
        })
        self.notification_webhook = notification_webhook
        self.previous_content = {}

    def fetch_page(self, url):
        """Fetch page content with error handling."""
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            return response.text
        except requests.RequestException as e:
            print(f"Error fetching {url}: {e}")
            return None

    def extract_content(self, html, selectors):
        """Extract content using CSS selectors."""
        soup = BeautifulSoup(html, 'html.parser')
        content = {}
        for name, selector in selectors.items():
            element = soup.select_one(selector)
            content[name] = element.text.strip() if element else None
        return content

    def check_for_changes(self, url, selectors, name):
        """Check if page content has changed."""
        html = self.fetch_page(url)
        if html is None:
            return None

        content = self.extract_content(html, selectors)

        if name in self.previous_content:
            old_content = self.previous_content[name]
            if content != old_content:
                self.notify(name, content, old_content)

        self.previous_content[name] = content
        return content

    def notify(self, name, new_content, old_content):
        """Send notification about changes."""
        changes = {}
        for key in new_content:
            if new_content[key] != old_content.get(key):
                changes[key] = {
                    'from': old_content.get(key),
                    'to': new_content[key]
                }

        if changes and self.notification_webhook:
            payload = {
                'name': name,
                'changes': changes,
                'timestamp': datetime.now().isoformat()
            }
            self.session.post(self.notification_webhook, json=payload)

        print(f"Changes detected in {name}: {list(changes.keys())}")

API-Based Data Collection

Automate data collection from APIs:

import requests
import json
from datetime import datetime
from pathlib import Path

class APICollector:
    """Collect and store data from REST APIs."""

    def __init__(self, base_url, output_dir='data'):
        self.base_url = base_url
        self.session = requests.Session()
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)

    def fetch_all_pages(self, endpoint, params=None, max_pages=None):
        """Fetch all pages from a paginated API."""
        all_data = []
        page = 1

        while True:
            if max_pages and page > max_pages:
                break

            url = f"{self.base_url}/{endpoint}"
            params = params or {}
            params['page'] = page

            try:
                response = self.session.get(url, params=params, timeout=30)
                response.raise_for_status()
                data = response.json()

                if 'data' in data:
                    all_data.extend(data['data'])
                else:
                    all_data.append(data)

                # Check for more pages
                if isinstance(data, dict) and 'pagination' in data:
                    if not data['pagination'].get('has_more', False):
                        break

                page += 1
                time.sleep(0.5)  # Rate limiting

            except requests.RequestException as e:
                print(f"Error on page {page}: {e}")
                break

        return all_data

    def save_to_json(self, data, filename):
        """Save collected data to JSON file."""
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filepath = self.output_dir / f"{filename}_{timestamp}.json"

        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"Saved {len(data)} records to {filepath}")
        return filepath

Data Processing Automation

Automate Excel and CSV processing:

import pandas as pd
from pathlib import Path
from datetime import datetime
import glob

class DataProcessor:
    """Automate data processing and transformation."""

    def __init__(self, input_dir, output_dir):
        self.input_dir = Path(input_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)

    def process_csv_files(self, processing_func, output_prefix=''):
        """Apply processing function to all CSV files."""
        results = []

        for csv_file in self.input_dir.glob('*.csv'):
            print(f"Processing: {csv_file.name}")

            try:
                df = pd.read_csv(csv_file)
                processed = processing_func(df)

                if isinstance(processed, pd.DataFrame):
                    output_file = self.output_dir / f"{output_prefix}_{csv_file.stem}.csv"
                    processed.to_csv(output_file, index=False)
                    results.append(output_file)
                    print(f"Saved: {output_file.name}")

            except Exception as e:
                print(f"Error processing {csv_file.name}: {e}")

        return results

    def merge_and_summarize(self, glob_pattern, group_by_col, aggregations):
        """Merge multiple files and create summary."""
        all_files = glob.glob(str(self.input_dir / glob_pattern))

        if not all_files:
            print("No files found matching pattern")
            return None

        # Load and concatenate all files
        dfs = [pd.read_csv(f) for f in all_files]
        combined = pd.concat(dfs, ignore_index=True)

        # Create summary
        summary = combined.groupby(group_by_col).agg(aggregations).reset_index()

        # Add metadata
        summary['processed_at'] = datetime.now().isoformat()
        summary['source_files'] = len(all_files)

        return summary

# Example processing function
def clean_sales_data(df):
    """Clean and transform sales data."""
    df = df.copy()

    # Standardize column names
    df.columns = [c.lower().strip().replace(' ', '_') for c in df.columns]

    # Convert dates
    date_cols = [c for c in df.columns if 'date' in c.lower()]
    for col in date_cols:
        df[col] = pd.to_datetime(df[col], errors='coerce')

    # Fill missing values
    numeric_cols = df.select_dtypes(include=['number']).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)

    # Add computed columns
    df['year_month'] = df['date'].dt.to_period('M')
    df['quarter'] = df['date'].dt.quarter

    return df

Task Scheduling

Run automation scripts on a schedule:

Using Schedule Library

import schedule
import time
import os

def job_daily_backup():
    """Daily backup task."""
    print("Starting backup...")
    os.system("rsync -av /source /destination/")
    print("Backup complete")

def job_weekly_report():
    """Generate weekly report."""
    print("Generating weekly report...")
    # Report generation code
    print("Report sent")

def job_hourly_check():
    """Hourly system check."""
    print(f"System check at {__import__('datetime').datetime.now()}")

# Schedule tasks
schedule.every().day.at("02:00").do(job_daily_backup)
schedule.every().monday.at("09:00").do(job_weekly_report)
schedule.every().hour.do(job_hourly_check)

# Run scheduler
if __name__ == "__main__":
    print("Scheduler started")
    while True:
        schedule.run_pending()
        time.sleep(30)

Using System Cron (Linux/macOS)

Create a cron job for Python scripts:

# Edit crontab
crontab -e

# Add entries (minute hour day month command)
# Run daily backup at 2 AM
0 2 * * * /usr/bin/python3 /path/to/backup_script.py >> /var/log/backup.log 2>&1

# Run hourly check every hour
0 * * * * /usr/bin/python3 /path/to/check_script.py

# Run every 15 minutes
*/15 * * * * /usr/bin/python3 /path/to/monitor_script.py

Using Windows Task Scheduler

# Create a scheduled task (run in PowerShell as Administrator)
$action = New-ScheduledTaskAction -Execute "python.exe" `
    -Argument "C:\scripts\myscript.py"
$trigger = New-ScheduledTaskTrigger -Daily -At "2:00 AM"
$settings = New-ScheduledTaskSettingsSet -RunOnlyIfNetworkAvailable

Register-ScheduledTask -Action $action -Trigger $trigger `
    -TaskName "DailyBackup" -Description "Run daily backup script" `
    -Settings $settings -User "username" -Password "password"

Building CLI Automation Tools

Create command-line tools with argparse:

#!/usr/bin/env python3
"""
automation_tools.py - Collection of automation utilities

Usage:
    python automation_tools.py organize <directory>
    python automation_tools.py monitor <url> [--every SECONDS]
    python automation_tools.py backup <source> <destination>
"""

import argparse
import sys
from pathlib import Path

def cmd_organize(args):
    """Organize files in a directory."""
    from automation import organize_downloads
    directory = Path(args.directory)
    if not directory.exists():
        print(f"Error: Directory {directory} does not exist")
        sys.exit(1)
    organize_downloads(str(directory))
    print("Organization complete")

def cmd_backup(args):
    """Backup source to destination."""
    import shutil
    source = Path(args.source)
    dest = Path(args.destination)

    if not source.exists():
        print(f"Error: Source {source} does not exist")
        sys.exit(1)

    dest.mkdir(parents=True, exist_ok=True)
    shutil.copytree(source, dest / source.name, dirs_exist_ok=True)
    print(f"Backup complete: {source} -> {dest}")

def cmd_monitor(args):
    """Monitor a URL for changes."""
    from automation import WebsiteMonitor
    import time

    monitor = WebsiteMonitor()
    selectors = {'content': args.selector} if args.selector else None

    print(f"Monitoring {args.url} every {args.every} seconds...")
    try:
        while True:
            result = monitor.check_for_changes(args.url, selectors or {}, "monitor")
            if result:
                print(f"Current content: {result}")
            time.sleep(int(args.every))
    except KeyboardInterrupt:
        print("\nMonitoring stopped")

def main():
    parser = argparse.ArgumentParser(
        description="Automation tools for common tasks",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    python automation_tools.py organize ./downloads
    python automation_tools.py backup ./projects ./backups
    python automation_tools.py monitor https://example.com --selector ".content"
        """
    )

    subparsers = parser.add_subparsers(dest='command', help='Available commands')

    # Organize command
    organize_parser = subparsers.add_parser('organize', help='Organize files by type')
    organize_parser.add_argument('directory', help='Directory to organize')

    # Backup command
    backup_parser = subparsers.add_parser('backup', help='Backup directory')
    backup_parser.add_argument('source', help='Source directory')
    backup_parser.add_argument('destination', help='Destination directory')

    # Monitor command
    monitor_parser = subparsers.add_parser('monitor', help='Monitor URL for changes')
    monitor_parser.add_argument('url', help='URL to monitor')
    monitor_parser.add_argument('--every', default='60', help='Check interval in seconds')
    monitor_parser.add_argument('--selector', help='CSS selector for content')

    args = parser.parse_args()

    if args.command == 'organize':
        cmd_organize(args)
    elif args.command == 'backup':
        cmd_backup(args)
    elif args.command == 'monitor':
        cmd_monitor(args)
    else:
        parser.print_help()
        sys.exit(1)

if __name__ == "__main__":
    main()

Error Handling and Logging

Reliable automation needs proper logging:

import logging
import sys
from pathlib import Path
from datetime import datetime

def setup_logging(name='automation', level=logging.INFO):
    """Configure logging for automation scripts."""

    log_dir = Path('logs')
    log_dir.mkdir(exist_ok=True)

    log_file = log_dir / f"{name}_{datetime.now():%Y%m%d}.log"

    logging.basicConfig(
        level=level,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler(sys.stdout)
        ]
    )

    return logging.getLogger(name)

def run_with_logging(func):
    """Decorator to run function with logging and error handling."""

    def wrapper(*args, **kwargs):
        logger = setup_logging()
        try:
            logger.info(f"Starting {func.__name__}")
            result = func(*args, **kwargs)
            logger.info(f"Completed {func.__name__} successfully")
            return result
        except Exception as e:
            logger.error(f"Error in {func.__name__}: {str(e)}", exc_info=True)
            raise

    return wrapper

Summary

Python automation turns tedious repetitive tasks into one-time scripts. Start with simple file operations, then move to web scraping, data processing, and scheduling. Build CLI tools for reusability. Add logging for reliability.

The scripts in this guide cover the most common automation scenarios. Modify them for your specific needs. The investment in automation pays back quickly when you reclaim hours of productive time.

For more automation examples, check our guides on Python scripting fundamentals and web scraping techniques.


Sources:

Spread The Article

Share this guide

Send this article to your network or keep a copy of the direct link.

X Facebook LinkedIn Reddit Telegram

Discussion

Leave a comment

No comments yet

Be the first to start the conversation.