diff --git a/noblogsmv/main.py b/noblogsmv/main.py index f7b5e8c0b509592a3a3496edfe2e5e8c045fa2c2..cfa794d823393575a215ed3ecc3267ba8d6a11f1 100644 --- a/noblogsmv/main.py +++ b/noblogsmv/main.py @@ -4,10 +4,11 @@ import os import re import subprocess import sys +import collections from noblogsmv import state from noblogsmv import webapp - +from functools import partial BLOGS_DIR = '/opt/noblogs/www/wp-content/blogs.dir' MYSQL_URI_RE = re.compile( @@ -16,9 +17,54 @@ MYSQL_URI_RE = re.compile( log = logging.getLogger(__name__) +def execute_gen(cmd, dry_run = False): + if dry_run: + log.info("[DR] %s" % cmd) + return '' + return subprocess.check_output(cmd) + + + def init(blog_id, value, progress): return 'move_data' +def init_clean(blog_id, value, progress): + return 'rm_data' + + +class MysqlNoblogs(object): + conn = collections.namedtuple('DbData', + ['host', 'port', 'user', 'password', 'db']) + + def __init__(self, old_host,old_port,old_user,old_pass,old_db, + new_host,new_port,new_user,new_pass,new_db,**kwd): + self.old = self.conn(old_host,old_port,old_user,old_pass, old_db) + self.new = self.conn(new_host,new_port,new_user,new_pass, new_db) + + + def options(self, conn): + return '-h%s -P%s -u%s -p%s' % (conn.host,conn.port, + conn.user,conn.password) + + @property + def old_options(self): + return self.options(self.old) + + @property + def new_options(self): + return self.options(self.new) + + def find_tables(self, blog_id): + find_tables =['mysql', self.options(self.old), self.old.db, '-NBe', + '\\"show tables like \'wp\\_%s\\_%%\'\\"' % blog_id] + cmd = ['ssh', 'root@%s' % self.old.host, + '"%s"' % ' '.join(find_tables)] + log.debug('Calling %s', ' '.join(cmd)) + raw_tables = subprocess.check_output(cmd) + self.tables = [ t.strip() for t in raw_tables.splitlines()] + + + def process_move_data(blog_id, value, progress): progress.update('moving blog data') @@ -28,7 +74,7 @@ def process_move_data(blog_id, value, progress): '%s/' % blog_dir, 'root@%s:%s' % (value['new_host'], blog_dir)] log.debug('Calling %s', ' '.join(cmd)) try: - subprocess.check_call(cmd) + execute(cmd) except (subprocess.CalledProcessError, OSError), e: progress.update('moving blog data failed, retrying') log.info('Could not move data for blog %s: %s', blog_id, e) @@ -42,58 +88,108 @@ def process_move_data(blog_id, value, progress): def process_move_db(blog_id, value, progress): progress.update('moving blog database') log.info('moving database for %s', blog_id) - old_mysql_opts = '-h%s -P%s -u%s -p%s' % (value['old_host'], - value['old_port'], - value['old_user'], - value['old_pass']) - new_mysql_opts = '-h%s -P%s -u%s -p%s' % (value['new_host'], - value['new_port'], - value['new_user'], - value['new_pass']) - find_tables = ['mysql', old_mysql_opts, value['old_db'], '-NBe', - '\\"show tables like \'wp\\_%s\\_%%\'\\"' % blog_id] - cmd = ['ssh', 'root@%s' % value['old_host'], '"%s"' % ' '.join(find_tables)] - log.debug('Calling %s', ' '.join(cmd)) + db = MysqlNoblogs(**value) try: - raw_tables = subprocess.check_output(cmd) + db.find_tables(blog_id) except (subprocess.CalledProcessError, OSError), e: progress.update('could not look up blog tables, retrying') log.info('Could not look up tables for blog %s: %s', blog_id, e) return None - tables = ' '.join(t.strip() for t in raw_tables.splitlines()) - move_db = ['mysqldump', '--opt', old_mysql_opts, value['old_db'], tables, - '|', 'mysql', new_mysql_opts, value['new_db']] + + if not db.tables: + log.info('Could not find any table for blog %s on host %s', + blog_id, value['old_host']) + #Not sure this is the best option: if we have *no* tables, + #we should probably not retry, but declare the move failed for good. + return state.STATE_ERROR + + tables = ' '.join(db.tables) + move_db = ['mysqldump', '--opt', db.old_options, value['old_db'], tables, + '|', 'mysql', db.new_options, value['new_db']] cmd = ['ssh', 'root@%s' % value['old_host'], '"%s"' % ' '.join(move_db)] log.debug('Calling %s', ' '.join(cmd)) try: - raw_tables = subprocess.check_output(cmd) + execute(cmd) except (subprocess.CalledProcessError, OSError), e: progress.update('could not move blog database, retrying') log.info('Could not move blog %s database: %s', blog_id, e) return None - return 'rm' + return state.STATE_DONE -def process_rm(blog_id, value, progress): +def process_rm_db(blog_id, value, progress): progress.update('removing blog in old location') log.info('removing data of blog %s from %s', blog_id, value['old_server_id']) + + db = MysqlNoblogs(**value) + try: + db.find_tables(blog_id) + except (subprocess.CalledProcessError, OSError), e: + progress.update('could not look up blog tables, retrying') + log.info('Could not look up tables for blog %s: %s', blog_id, e) + return None + + if not db.tables: + return state.STATE_DONE + + tables = ', '.join(db.tables) + drop_db = ['mysql', db.old_options, db.old.db, '-NBe', + '\\"DROP TABLE %s\\"' % tables] + cmd = ['ssh', 'root@%s' % value['old_host'], '"%s"' % ' '.join(drop_db)] + try: + execute(cmd) + except (subprocess.CalledProcessError, OSError), e: + progress.update('could not remove old blog database, retrying') + log.info('Could not remove blog %s database: %s', blog_id, e) + return None + progress.update('removed blog db; Done.') return state.STATE_DONE +def process_rm_data(blog_id, value, progress): + progress.update('removing blog data files from old location') + log.info('Removing blog data files') + blog_dir = os.path.join(BLOGS_DIR, blog_id) + cmd = ['ssh', 'root@%s' % value['old_host'], 'rm', '-rf', + '%s/' % blog_dir] + log.debug('Calling %s', ' '.join(cmd)) + try: + execute(cmd) + except (subprocess.CalledProcessError, OSError), e: + progress.update('removing old blog data failed, retrying') + log.info('Could not remove data for blog %s: %s', blog_id, e) + return None + + progress.update('moved blog data') + log.info('Moved data for blog %s', blog_id) + return 'rm_db' + class NoblogsStateMachine(state.StateMachine): + pass + +class NoblogsMoveStateMachine(NoblogsStateMachine): states = { 'error': state.nop, 'init': init, 'move_data': process_move_data, 'move_db': process_move_db, - 'rm': process_rm, 'done': state.nop, - } + } +class NoblogsCleanStateMachine(NoblogsStateMachine): + + states = { + 'error': state.nop, + 'init': init_clean, + 'rm_data': process_rm_data, + 'rm_db': process_rm_db, + 'done': state.nop + } + def main(): parser = optparse.OptionParser(usage='''%prog [<OPTIONS>] @@ -117,11 +213,15 @@ use the `--recover' option. parser.add_option('--num-workers', dest='num_workers', type='int', default=20, help='Number of workers (default: %default)') + parser.add_option('--dry-run', dest='dry_run', action='store_true', + default=False, help='Do not really perform the migration.') parser.add_option('--recover', dest='recover', action='store_true', help='Recover a previous run') parser.add_option('--noexit', action='store_true', help='Do not exit when done') parser.add_option('--debug', action='store_true') + parser.add_option('--clean', action='store_true', + help='Clean up instead of moving blogs.') opts, args = parser.parse_args() if len(args) != 0: @@ -130,10 +230,18 @@ use the `--recover' option. logging.basicConfig( level=(logging.DEBUG if opts.debug else logging.INFO)) + global execute + execute = partial(execute_gen, dry_run = opts.dry_run) + + if opts.clean: + statemachine = NoblogsCleanStateMachine + else: + statemachine = NoblogsMoveStateMachine + inputfd = None if not opts.recover: inputfd = sys.stdin - sm, httpserver = NoblogsStateMachine.new_with_fd( + sm, httpserver = statemachine.new_with_fd( opts.state_file, inputfd=inputfd, num_workers=opts.num_workers, diff --git a/noblogsmv/noblogs_map_diff.py b/noblogsmv/noblogs_map_diff.py index 0a8552f48e45cd598f2e2b8eea755c5f87502372..8dde0df3cb863a3d3293a37fdf28130049f4be10 100644 --- a/noblogsmv/noblogs_map_diff.py +++ b/noblogsmv/noblogs_map_diff.py @@ -55,7 +55,7 @@ def generate_diff_from_maps(old_map, old_backend, new_map, new_backend): 'old_server_id': old_bid, } diff[blog_id].update(_backend_params(new_backend, new_bid, 'new')) - diff[blog_id].update(_backend_params(old_backend, old_bid, 'new')) + diff[blog_id].update(_backend_params(old_backend, old_bid, 'old')) log.info('%d total blogs, %d need to be moved (%.2g%%)', len(new_blogs), len(diff), float(len(diff)) / len(new_blogs)) @@ -76,7 +76,7 @@ def read_backends(filename): return None b = {} bid, b['user'], b['pass'], b['host'], b['port'], b['db'] = match.groups() - backends['backend_%d' % bid] = b + backends['backend_%d' % int(bid)] = b return backends diff --git a/noblogsmv/state.py b/noblogsmv/state.py index 5f4b91d881ad0d6ef20863c8387f0d309d59efb3..3c5155f268cdea2a43d1f47163b0649381ec1e58 100644 --- a/noblogsmv/state.py +++ b/noblogsmv/state.py @@ -363,4 +363,4 @@ class StateMachine(object): def write_work_unit(fd, key, data): - fd.write('%s %s\n', key, json.dumps(data)) + fd.write('%s %s\n' % (key, json.dumps(data))) diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 9a4642ec0dedf3c08ccacc864d9b3d0ae2801776..14811eed154deb8e5945487f3e0283f7c3f0e732 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( setup_requires=[], zip_safe=True, packages=find_packages(), - package_data={"noblogsmv": ["templates/*", "static/*"]}, + package_data={"noblogsmv": ["templates/*", "static/img/*", "static/css/*", "static/js/*"]}, entry_points={ "console_scripts": [ "noblogs-map-diff = noblogsmv.noblogs_map_diff:main", @@ -21,4 +21,3 @@ setup( ], }, ) -