#!/usr/bin/env python
#
# Split a Durus file storage file into multiple chunks, making it easier
# rsync.  The chunks can be concatenated to reconstruct the original file.
# This script tries to incrementally update the chunks, falling back to
# generating them all if the storage was packed.
#
# Example crontab entry:
#
#   0,15,30,45 * * * * bin/split_durus_fs.py myapp/data/app.durus db_backup

import sys
import os
import pickle

STATE_FILE = 'state.pkl'

def write_state(backup_dir, st, pos, i):
    filename = os.path.join(backup_dir, STATE_FILE + '.new')
    try:
        fp = open(filename, 'wb')
        pickle.dump((st.st_dev, st.st_ino, pos, i), fp)
        #print 'wrote state', (st.st_dev, st.st_ino, pos, i)
        os.fsync(fp)
        fp.close()
    except:
        try:
            os.unlink(filename)
        except IOError:
            pass
    else:
        os.rename(filename, os.path.join(backup_dir, STATE_FILE))


def write_chunks(db_file, backup_dir, st, pos, i):
    fp = open(db_file, 'rb')
    fp.seek(pos)
    i += 1
    outfp = open(os.path.join(backup_dir, '%.4d.dat' % i), 'wb')
    while 1:
        data = fp.read(1000000)
        if not data:
            break
        outfp.write(data)
        pos += len(data)
    fp.close()
    outfp.close()
    write_state(backup_dir, st, pos, i)


def do_full(db_file, backup_dir, st):
    #print 'do_full'
    for filename in os.listdir(backup_dir):
        if filename.endswith('.dat'):
            os.unlink(os.path.join(backup_dir, filename))
    write_chunks(db_file, backup_dir, st, 0, 0)


def do_incremental(db_file, backup_dir, st, pos, i):
    #print 'do_incremental'
    write_chunks(db_file, backup_dir, st, pos, i)


def check_durus_filename(pid_file, db_file):
    # ensure that the durus file open is the same one we are backing up
    pid = int(open(pid_file).read().strip())
    base = '/proc/%d/fd' % pid
    for fn in os.listdir(base):
        fn = os.path.join(base, fn)
        if os.path.samefile(fn, db_file):
            return # okay
    raise RuntimeError('the open durus file does not match the path specified')

def main():
    try:
        db_file = sys.argv[1]
        backup_dir = sys.argv[2]
    except IndexError:
        raise SystemExit('Usage: %s <db file> <backup dir> [<pid file>]' %
                         os.path.basename(sys.argv[0]))
    if len(sys.argv) > 3:
        pid_file = sys.argv[3]
        check_durus_filename(pid_file, db_file)
    st = os.stat(db_file)
    try:
        fp = open(os.path.join(backup_dir, STATE_FILE), 'rb')
    except IOError:
        do_full(db_file, backup_dir, st)
    else:
        dev, inode, pos, i = pickle.load(fp)
        #print 'state', dev, inode, pos, i
        if (st.st_dev == dev and
            st.st_ino == inode and
            st.st_size >= pos):
            # We are reasonably sure that the file has not been packed.  Durus
            # keeps the original file around after packing so the inodes cannot
            # be the same.  Do an incremental backup.
            if st.st_size == pos:
                pass # nothing to do
            else:
                do_incremental(db_file, backup_dir, st, pos, i)
        else:
            do_full(db_file, backup_dir, st)


if __name__ == '__main__':
    main()
