10.5: Compare versions of files on Time Machine backup

Mar 02, '09 07:30:04AM

Contributed by: ptone

There are (or have been) a number of version control systems en vogue over time -- CVS, SVN, Git, etc. I try to keep up with them and use them where possible, but I don't put everything I do in version control. Since I am on a Mac running Leopard, I do have (and use) Time Machine. So I wanted to see if I could use my Time Machine backups to do some quick comparisons between source files.

The result is the following quick and dirty python script. Note that you can find the original version of this script in this post on my blog; you may want to check there for an updated version. Here's the code:

#!/usr/bin/env python
# encoding: utf-8
"""
Created by Preston Holmes on 2009-02-23.
Copyright (c) 2009 __MyCompanyName__. All rights reserved.
"""

import sys
import os
import getopt
import difflib
import time
import pdb
from subprocess import Popen, PIPE

# if you set time_machine_path, it should be to the full path of this machines backup drive:
# ie '/Volumes/TM_Drive/Backups.backupsdb/Joes-Mac/'
# if not set explicitly - the script will use the first TM drive it finds, 
# and the first host folder it finds - which should work for most cases
# Will Not work with network or disk image based time machine backups

time_machine_path = None

verbose = False
cmd = 'tell application "Finder" to name of (path to startup disk)'
boot_volume = Popen('osascript -e '%s'' % cmd,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0][0:-1]



help_message = '''
Call this script with the path to one or more text based files as arguments
'''
header = '''

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html>

<head>
    <meta http-equiv="Content-Type"
          content="text/html; charset=ISO-8859-1" />
    <title></title>
    <style type="text/css">
        table.diff {font-family:Courier; border:medium;}
        .diff_header {background-color:#e0e0e0}
        td.diff_header {text-align:right}
        .diff_next {background-color:#c0c0c0}
        .diff_add {background-color:#aaffaa}
        .diff_chg {background-color:#ffff77}
        .diff_sub {background-color:#ffaaaa}
    </style>
</head>

<body>
'''
footer = '''
    <table class="diff" summary="Legends">
        <tr> <th colspan="2"> Legends </th> </tr>
        <tr> <td> <table border="" summary="Colors">
                      <tr><th> Colors </th> </tr>
                      <tr><td class="diff_add">&nbsp;Added&nbsp;</td></tr>
                      <tr><td class="diff_chg">Changed</td> </tr>
                      <tr><td class="diff_sub">Deleted</td> </tr>
                  </table></td>
             <td> <table border="" summary="Links">
                      <tr><th colspan="2"> Links </th> </tr>
                      <tr><td>(f)irst change</td> </tr>
                      <tr><td>(n)ext change</td> </tr>
                      <tr><td>(t)op</td> </tr>
                  </table></td> </tr>
    </table>
</body>

</html>
'''
class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg

def find_versions(path):
    #pdb.set_trace()
    global verbose
    print 'looking for versions of %s' % path
    #print time.strftime("%m/%d/%Y %I:%M:%S %p",time.localtime(os.path.getmtime(fname)))
    backups = os.listdir(time_machine_path)
    backups.sort()
    pathlist = [os.path.join(time_machine_path,b,boot_volume,path[1:]) for b in backups]
    pathlist.append(path)
    versions = []
    mod_times = []
    for f in pathlist:
        if verbose:print 'nLooking for modified version in %s' % f
        if os.path.exists(f):
            if verbose:print 'File Exists'
            mod_time = time.localtime(os.path.getmtime(f))
            if not mod_time in mod_times:
                if verbose:print '****************** unique version **********************'
                mod_times.append(mod_time)
                versions.append({mod_time:f})
    return versions
    
def getTMLocation():
    global time_machine_path
    if time_machine_path and os.path.exists(time_machine_path):
        return True
    volumes = os.listdir('/Volumes')
    #hostname = os.uname()[1].split('.')[0]
    #cmd = 'scutil --get ComputerName'
    #machine_name = Popen('osascript -e '%s'' % cmd,shell=True,stdout=PIPE,stderr=PIPE).communicate()[0][0:-1]
    for v in volumes:
        if os.path.exists(os.path.join('/Volumes',v,'Backups.backupdb')):
            backupsdb = (os.path.join('/Volumes',v,'Backups.backupdb'))
            time_machine_path = os.path.join(backupsdb,os.listdir(backupsdb)[0])
            return True
            # candidate_path = os.path.join('/Volumes',v,'Backups.backupdb',machine_name)
            # if os.path.exists(candidate_path):
            #     time_machine_path = candidate_path
            #     return True
    return False
    
def main(argv=None):
    global verbose
    if not getTMLocation():
        print 'No Time Macine Backup Found'
        sys.exit()
    print 'time machine path: ' + time_machine_path
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "ho:v", ["help", "output="])
        except getopt.error, msg:
            raise Usage(msg)
    
        # option processing
        for option, value in opts:
            if option == "-v":
                verbose = True
            if option in ("-h", "–help"):
                raise Usage(help_message)
            if option in ("-o", "–output"):
                output = value
        if not args:
            raise Usage(help_message)
        differ = difflib.HtmlDiff(tabsize=4)
        html = header
        for path in args:
            path = os.path.join(os.getcwd(),path)
            html += '<h1>Changes for %s</h1' % os.path.basename(path)
            versions = find_versions(path)
            if len(versions) < 2:
                html += '<h2>Less than 2 Versions found on Time Machine Backup</h2>'
            else:
                html += '<h2>%s versions found</h2>' % len(versions)
            for i in range(0,len(versions)):
                if i: #skip the first
                    d1 = time.strftime("%m/%d/%Y %I:%M:%S %p",versions[i-1].keys()[0])
                    d2 = time.strftime("%m/%d/%Y %I:%M:%S %p",versions[i].keys()[0])
                    html = html + '<h2>changes from %s to %s</h2>' % (d1,d2)
                    l1 = open(versions[i-1].values()[0]).readlines()
                    l2 = open(versions[i].values()[0]).readlines()
                    table = differ.make_table(l1,l2,context=True,numlines=3)
                    html += table
        html += footer
        o = open('/tmp/diff.html','w')
        o.write(html)
        o.close()
        import webbrowser
        webbrowser.open('/tmp/diff.html')
    except Usage, err:
        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
        print >> sys.stderr, "t for help use –help"
        return 2


if __name__ == "__main__":
    sys.exit(main())
In the unlikely event that I ever have time, this would be a cool pyObjC project -- add a file browser panel, date versions picker, and a webkit view (with some better CSS).

[robg adds: To use this script, save the file (filecomp.py or whatever), make it executable (chmod a+x filecomp.py), and then run it in Terminal, passing the path to a text file as an argument. Assuming you saved it somewhere on your path, that would look something like: filecomp.py ~/Documents/myfile.txt. It seemed to work in my testing, though thanks to a recent mistake on my part, I don't have much Time Machine history to dig through.]

Comments (4)


Mac OS X Hints
http://hints.macworld.com/article.php?story=20090223172046475