#! /usr/bin/python ############################################################################################# ## Spotlight Backup 0.1 ## Alden Stradling who can be found at cern ch (anti-spam) ## 24 August 2005 ## ## Requirements: Tiger (Mac OS 10.4) and an account on a unix/osx machine with ## incoming ssh capability (and adequate space). ## ## Can be run as a cron job as well. ## ## Installation - ## ## Save as backup.py (OK, it doesn't matter, but that's the convention I use here.) ## Run as: ## > python backup.py ## ## or ## ## > chmod u+x backup.py ## > ./backup.py ## ## This is a simple-minded copy scheme. There are cleverer ways of grouping files for grouped ## transfers - I haven't bothered with that as of yet. You can feel free to try it out. ## The design, in principle, will not put much load on your machine. ## ## Be aware that Spotlight might not see things modified in the last half-hour or so (in ## general - YMMV), so this might be run after-hours as a cron job. ## ## Be aware that the backup is incremental, and overwrites previous files of the same name. ## ## Be aware that this does not clean up after itself on the server - if files are deleted on ## your local machine, the change will not be reflected on the server. This is great for ## archival reasons, but is a problem if you have space limitations. ## ## Be aware that this is not a bootable backup. This is to preserve your important files with ## a minimum of fuss. ## This script is for use at your own risk. I think it works, but I've been known to be wrong. ############################################################################################## import os, sys, commands, time # USER SET: backupserver and backupdirectory. # The server should be one on which you have an account. The directory will be the equivalent of your # /Users/ directory, copied onto the server. # To use this conveniently, you should have batch mode connection capability between your machine and your server. # Follow the directions at http://www.atmos.albany.edu/facstaff/rmctc/ssh2/ to gain this capability. If you require a # password to establish the connection, you will have to be there at the time of backup to enter it. For this reason, I # suggest you enter no password when doing the keygen, and do this between your computer and a purpose-built, low-privilege # account on your backup server - that minimizes your security risk. backserver='your.server.com' backdir='/Your/backup/path' home=os.environ['HOME'] # A list of the files last transferred and a list of the folders that exist on the remote server are maintained in these # two files backfile='%s/.backuplist' % home backfolders='%s/.backupfolders' % home # Any of these are valid time units, and any of them (except yesterday) can be given an argument as seen on today #unit='now(-800)' # 800 seconds ago #unit='today' # today unit='today(-3)' # 3 days past #unit='yesterday' # yesterday #unit='this_week' # this week #unit='this_month' # this month #unit='this_year' # this year #This is the size limit imposed for files to transfer. This can be set arbitrarily high. size='8000000' # Required in the path to be backed up yes_l=[home] # Items forbidden in the file path no_l=['.Trash','Library/Caches','Library/Safari','Library/Autosave','Library/Favorites','Library/Recent Servers','Library/Cookies','.collection'] # Spotlight gets the qualifying items from /Users/ f_list=commands.getoutput('mdfind -onlyin %s \'( kMDItemFSContentChangeDate > $time.%s && kMDItemFSSize < %s)\'' % (home,unit,size)).split('\n') clobber=[] # Files that do not qualify are killed for i in f_list: for crit in yes_l: if i.find(crit) < 0: clobber.append(i) continue for crit in no_l: if i.find(crit) >= 0: clobber.append(i) continue for i in clobber: f_list.remove(i) f_list.sort() filtered=[] # Useless path and folder designations are removed from the list so as not to waste time for i in f_list: path=i[:i.rfind(os.sep)] while filtered.count(path): filtered.remove(path) filtered.append(i) del f_list f=file(backfile,'w') f.write('%s\n' % str(time.time())) f.write('\n'.join(filtered)) f.close() x_list=[] for i in filtered: x_list.append([i,i.replace(home,backdir)]) del filtered folders=[] mk_folders=[] folder_tree=[] # Gets the directory structure on the server raw_folders=commands.getoutput('ssh %s ls -lR %s|grep %s' % (backserver,backdir,backdir)).split(':\n') #First filter pass - existing folders for i in raw_folders: path=i[:i.rfind(os.sep)] while folder_tree.count(path): folder_tree.remove(path) folder_tree.append(i) del raw_folders #First filter pass - new folders for i in x_list: path=i[1][:i[1].rfind(os.sep)] while folders.count(path): folders.remove(path) folders.append(path) #Determine overlaps if folders: for new in folders: mk_folders.append(new) # Temporarily added for old in folder_tree: if len(new) <= len(old): if old[:len(new)] == new: mk_folders.remove(new) # Only add a folder if it makes it through the whole list unmatched break folder_tree.extend(mk_folders) folder_tree.sort() f=file(backfolders,'w') for i in folder_tree: f.write(i.strip()+'\n') f.close() # Because spaces are common is OS X filenames, they have to be modified to work properly with the layers of # scp daemons and with python for i in range(len(mk_folders)): mk_folders[i]=mk_folders[i].replace(' ','\\\ ') for i in range(len(x_list)): x_list[i][1]=x_list[i][1].replace(' ','\\\\\ ') # Makes the necessary folders on the server if mk_folders: for i in mk_folders: print 'ssh %s mkdir -p "%s"' % (backserver,i) os.system('ssh %s mkdir -p "%s"' % (backserver,i)) # Make all directories that need made # Copies the files for i in x_list: print 'scp "%s" %s:%s' % (i[0],backserver,i[1]) os.system('scp "%s" %s:%s' % (i[0],backserver,i[1]))