I wanted to use a proper backup solution to replace my rsync script. I decided to use BorgBackup as it seemed to suit the bill. It is a repository based system that has very strong deduplication algorithms. Essentially, you create a backup repository in a particular path and then backup folders and files to the repository.

I liked the idea of using BorgBackup. However, I wanted to automate the process. My previous rsync backup script worked with removable media consisting of various USB hard disks and regular hard disks that would be plugged into a USB dock. It would detect which drive was inserted and sync the files to it. I wanted to make sure I could do the same thing with BorgBackup.

Initially, I wrote a shell script to handle it. There was a lot of repetition in the script so I decided to rewrite it in python.

Here is the resulting script:

#!/usr/bin/env python3
#-*- coding:utf-8 -*-

"""
A script to automate a borg backup.


Copyright (c) 2018 Troy Williams

License: The MIT License (http://www.opensource.org/licenses/mit-license.php)
"""

# Constants
__uuid__ = ''
__author__ = 'Troy Williams'
__email__ = 'troy.williams@bluebill.net'
__copyright__ = 'Copyright (c) 2018, Troy Williams'
__date__ = '2018-10-01'
__maintainer__ = 'Troy Williams'

import sys
import os
import subprocess
import platform
import datetime
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

# ---------------
# This stuff with the drives, backup folders could be moved to a configuration
# file...

# removable drives base folder
base_media_folder = "/media/troy"

# removable drives that we are interested in backing up to
backup_media = []
backup_media.append(os.path.join(base_media_folder, "backup1"))
backup_media.append(os.path.join(base_media_folder, "backup2"))
backup_media.append(os.path.join(base_media_folder, "backup3"))
backup_media.append(os.path.join(base_media_folder,
                                 "c5817615-37c8-4765-bbc3-955ecd426db1/troy"))

# repositories on the media that we want to backup too. 
# The tuple contains the path in the file system and 
# the target folder on the media.
repositories = [('/home/troy', 'home_backup'),                
                ('/home/troy/music', 'music'),
                ('/home/troy/pictures', 'pictures'),                
                ('/home/troy/videos', 'videos')]


def find_first_active_drive(paths):
    """
    """
    for path in paths:
        if os.path.isdir(path):
            return path

    return None


def borg_create_command(backup_folder,
                        repository_folder,
                        backup_excludes=None):

    command = ['borg',
               'create',
               '--verbose',
               '--progress',
               '--stats',
               '--compression', 'auto,lzma',
               "{}::'{}-{:%Y-%m-%d %H:%M}'".format(repository_folder,
                                                   platform.node(),
                                                   datetime.datetime.now()),
               backup_folder]

    if backup_excludes:
        excludes = []
        for exclude in backup_excludes:
            excludes.append('--exclude')
            excludes.append('{}'.format(exclude))

            # excludes.append('--exclude {}'.format(exclude))

        command.extend(excludes)

    return command


def borg_prune_command(repository_folder):

    # borg prune -v $REPOSITORY --prefix '{hostname}-'         \
    #     --keep-hourly=6                                      \
    #     --keep-daily=7                                       \
    #     --keep-weekly=4                                      \
    #     --keep-monthly=6                                     \

    command = ['borg',
               'prune',
               '-v',
               repository_folder,
               '--prefix',
               '{}-'.format(platform.node()),
               '--keep-hourly=6',
               '--keep-daily=7',
               '--keep-weekly=4',
               '--keep-monthly=6']

    return command


def borg_list_command(repository_folder):

    return ['borg', 'list', repository_folder]


def get_parser():
    """Get parser object for script xy.py."""

    parser = ArgumentParser(description=__doc__,
                            formatter_class=ArgumentDefaultsHelpFormatter)

    parser.add_argument('--init',
                        dest='init',
                        action='store_true',
                        help=('Create folders and initialize',
                              ' repositories on the target media.'))

    parser.add_argument('--verify',
                        dest='verify',
                        action='store_true',
                        help='Verify and Validate the repositories.')

    # parser.add_argument('-r', '--result',
    #                     dest='result_file',
    #                     help='The file to contain the',
    #                          'combined odd and even lines.',
    #                     metavar=')

    return parser


def main():
    """
    This runs the rest of the functions in this module
    """

    # get the command line arguments
    parser = get_parser()
    args = parser.parse_args()

    target_path = find_first_active_drive(backup_media)
    if not target_path:
        print('Could not find a backup drive.',
              ' Please mount a drive and try again.')
        sys.exit(1)

    # we have a path to a repository
    print('Backing up to: {}'.format(target_path))
    print()

# -----------------------------------------
    if args.init:
        # check to see if the repository folders exist on the  drive
        # if not create them and initialize a borg repository
        # https://borgbackup.readthedocs.io/en/stable/usage/init.html
        print('Initializing target: {}'.format(target_path))

        for r in repositories:
            backup, folder = r

            repo = os.path.join(target_path, folder)

            if not os.path.exists(repo):
                os.makedirs(repo)

                result = subprocess.run(['borg',
                                         'init',
                                         '--encryption=none',
                                         repo])

                if result.returncode != 0:
                    sys.exit(result.returncode)

        print('{} has been initialized...'.format(target_path))
        sys.exit(0)

    if args.verify:
        # verify the repositories on the target media
        # https://borgbackup.readthedocs.io/en/stable/usage/check.html#

        for r in repositories:
            backup, folder = r

            repo = os.path.join(target_path, folder)
            print('Verifying: {}'.format(repo))

            if not os.path.exists(repo):
                os.makedirs(repo)

                result = subprocess.run(['borg',
                                         'check',
                                         repo])

                if result.returncode != 0:
                    sys.exit(result.returncode)

        print('Verification Complete...')
        sys.exit(0)

    # -------------------------
    # Backup the folders to the repositories
    for r in repositories:
        backup, folder = r

        repo = os.path.join(target_path, folder)

        # see if there are any exclude files associated with the backup folder
        # they will contain a list of folders and matches that we don't want to
        # backup. If the file exists, it will be read in and processed
        excludes_file = '{}.borg.excludes'.format(os.path.basename(backup))

        excludes = None
        try:
            with open(excludes_file) as f:
                excludes = [line.strip() for line in f]

        except IOError:
            pass

        print()
        print('Backing up: {}'.format(backup))
        print('Repo:       {}'.format(repo))

        if os.path.isdir(repo):
            command = borg_create_command(backup,
                                          repo,
                                          excludes)

            result = subprocess.run(command)

            if result.returncode != 0:
                sys.exit(result.returncode)

            command = borg_prune_command(repo)
            result = subprocess.run(command)

            command = borg_list_command(repo)
            result = subprocess.run(command)

        else:
            print("Repo Folder Doesn't exit. Skipping...")
            print()

    return 0  # success


if __name__ == '__main__':
    status = main()
    sys.exit(status)

Usage

In order to use the backup feature, you need to create and initialize a new repository on the target media. Do that by issuing the following command:

$ python3 backup_borg.py --init

To perform a backup on media that has already been initialized, issue the following command:

$ python3 backup_borg.py

Excludes

If for some reason there are paths that you wish to exclude from the backup process. Create a file with the basename of the folder that you are backing up. For example, '/home/troy', this path is set in the repositories variable. The file would be called "troy.borg.excludes" and it would contain the folders that you want to exclude. One folder/file filter per line:

/home/troy/.esd_auth
/home/troy/.mozilla/firefox/*/Cache
/home/troy/.mozilla/firefox/*/minidumps
/home/troy/.mozilla/firefox/*/.parentlock
/home/troy/.mozilla/firefox/*/urlclassifier3.sqlite
/home/troy/.mozilla/firefox/*/blocklist.xml
/home/troy/.mozilla/firefox/*/extensions.sqlite
/home/troy/.mozilla/firefox/*/extensions.sqlite-journal
/home/troy/.mozilla/firefox/*/extensions.rdf
/home/troy/.mozilla/firefox/*/extensions.ini
/home/troy/.mozilla/firefox/*/extensions.cache
/home/troy/.mozilla/firefox/*/XUL.mfasl
/home/troy/.mozilla/firefox/*/XPC.mfasl
/home/troy/.mozilla/firefox/*/xpti.dat
/home/troy/.mozilla/firefox/*/compreg.dat
/home/troy/.config/google-chrome/Default/Local Storage
/home/troy/.config/google-chrome/Default/Session Storage
/home/troy/.config/google-chrome/Default/Application Cache
/home/troy/.config/google-chrome/Default/History Index *