Commit 067d6202 authored by Joe's avatar Joe Committed by lechuck

hyperdb: simplify the stack

Right now we don't need consistent hashing for the database as each blog is
supposed to be local to the server it hosts it. So we're left with two datasets:
the global one and the local one.

The consistent hashing is still used to generate the blogs map for nginx, and
the script has been modified accordingly.
Finally, all r2db tools that weren't used anymore are being removed, and the one
left (get-backends.php) will be converted in a future commit. Also, some tools in
noblogscli should be accomodated accordingly.
Signed-off-by: default avatarJoe <joe@incal.net>
parent 4c204612
<?php
// Common settings
$wpdb->persistent = true;
$wpdb->max_connections = 30;
function noblogs_split_db($file) {
$db_url = trim(file_get_contents($file));
return parse_url($db_url);
}
function noblogs_load_global_dataset($master_file, $ip_file) {
global $wpdb;
$mdata = noblogs_split_db($master_file);
$ldata = trim(file_get_contents($ip_file));
$globaldb = array(
"host" => $mdata["host"] . ":" . $mdata["port"],
"user" => $mdata["user"],
"password" => $mdata["pass"],
"name" => substr($mdata["path"], 1),
"dataset" => "global",
"read" => 1, "write" => 1, "timeout" => 2
);
// On the master, just add it locally
if ($mdata['host'] == $ldata) {
$wpdb->add_database($globaldb);
} else {
// On the slaves, add the master as write-only, the local replicas
// as read-only.
$globaldb['read'] = 0;
$wpdb->add_database($globaldb);
$globaldb['host'] = $ldata . ":" . $mdata["port"];
$globaldb['read'] = 1;
$globaldb['write'] = 0;
$wpdb->add_database($globaldb);
}
}
function noblogs_dataset($query, $wpdb) {
if (preg_match("/^{$wpdb->base_prefix}(\d+)_/", $wpdb->table, $matches)) {
return "local";
}
return "global";
}
/**
* AI custom: Logs all queries for debugging purposes
*/
function ai_log_db_queries($query, $time, $backtrace=null, hyperdb $obj ) {
$fh = fopen( ABSPATH . '/profiling/noblogs_queries_'. date('Ymd') . '.log', 'a');
if (!$fh) {
return array($query, $time, $backtrace);
}
fwrite($fh, sprintf("##\n#Date: %s\n#Query time: %s\n%s\n", date('r'), $time, $query));
fclose($fh);
return array($query, $time, $backtrace);
}
/* Disable responsiveness check, is not needed in our configuration and is causing
/* Disable responsiveness check, is not needed in our configuration and is causing
connection errors for the wrong implementation */
$wpdb->check_tcp_responsiveness = false;
include_once('r2db/db-hash.php');
include_once('r2db/db-backends.php');
// Set up global dataset with master databases
noblogs_load_global_dataset(NOBLOGS_MASTER_CONFIG, NOBLOGS_HOST_FILE);
// Add all the sharded blog databases.
$wpdb_reverse_backend_map = noblogs_load_backends(NOBLOGS_BACKEND_CONFIG, $wpdb->hash_map);
// Add all the blog databases. Note that if the partition function sends
// traffic to the wrong backend, the backend will not be able to route db requests
// which is acceptable since we already assume this in a thousand places.
$localdb = array(
"host" => "localhost",
"port" => "3307", # TODO: check if this works on staging?
"user" => NOBLOGS_LOCAL_DB_USER,
"password" => NOBLOGS_LOCAL_DB_PASSWORD,
"name" => "noblogs_local",
"dataset" => "local",
"read" => 1,
"write" => 1,
"timeout" => 10
);
$wpdb->add_database($localdb);
// Now add a simple callback that will allow us to choose between the
// local and global datasets
$wpdb->add_callback('noblogs_dataset');
// Common settings
$wpdb->persistent = true;
$wpdb->max_connections = 30;
if ( AI_DB_PROFILER === true ) {
$wpdb->save_queries = true;
$wpdb->save_query_callback = 'ai_log_db_queries';
}
<?php
function noblogs_load_backends($config_file) {
require_once(dirname(__FILE__) . '/flexihash.php');
require_once( ABSPATH . 'wp-config.php' );
$hash = new Flexihash(null, R2DB_FLEXIHASH_REPLICAS);
$fp = @fopen($config_file, "r");
if (!$fp) {
die("Backends not configured!");
}
while (($line = fgets($fp, 1024)) !== false) {
$wline = rtrim($line);
if ($wline == "" || $wline[0] == '#') {
continue;
}
$line_parts = explode(" ", $wline);
$server_id = $line_parts[0];
$dataset = "backend_" . $server_id;
$hash->addTarget($dataset);
}
fclose($fp);
return $hash;
}
<?php
function noblogs_split_db($file) {
$db_url = trim(file_get_contents($file));
return parse_url($db_url);
}
function noblogs_load_backends($db_config_file, $hashptr) {
global $wpdb;
$backend_map = array();
$fp = @fopen($db_config_file, "r");
if (!$fp) {
die("Database backends not configured!");
}
while (($line = fgets($fp, 1024)) !== false) {
$wline = rtrim($line);
if ($wline == "" || $wline[0] == '#') {
continue;
}
$line_parts = explode(" ", $wline);
$server_id = $line_parts[0];
$dataset = "backend_" . $server_id;
$backend_url = $line_parts[2];
$backend_url_data = parse_url($backend_url);
$backend = array(
"host" => $backend_url_data["host"] . ":" . $backend_url_data["port"],
"user" => $backend_url_data["user"],
"password" => $backend_url_data["pass"],
"name" => substr($backend_url_data["path"], 1),
"dataset" => $dataset,
"read" => 1, "write" => 1, "timeout" => 10
);
$wpdb->add_database($backend);
$hashptr->addTarget($dataset);
$backend_map[$dataset] = $backend;
}
fclose($fp);
return $backend_map;
}
function noblogs_load_global_dataset($master_file, $ip_file) {
global $wpdb;
$mdata = noblogs_split_db($master_file);
$ldata = trim(file_get_contents($ip_file));
$globaldb = array(
"host" => $mdata["host"] . ":" . $mdata["port"],
"user" => $mdata["user"],
"password" => $mdata["pass"],
"name" => substr($mdata["path"], 1),
"dataset" => "global",
"read" => 1, "write" => 1, "timeout" => 2
);
if ($mdata['host'] == $ldata) {
$wpdb->add_database($globaldb);
} else {
$globaldb['read'] = 0;
$wpdb->add_database($globaldb);
$globaldb['host'] = $ldata . ":" . $mdata["port"];
$globaldb['read'] = 1;
$globaldb['write'] = 0;
$wpdb->add_database($globaldb);
}
}
<?php
require_once(dirname(__FILE__) . '/flexihash.php');
require_once( ABSPATH . 'wp-config.php' );
$wpdb->hash_map = new Flexihash(null, R2DB_FLEXIHASH_REPLICAS);
/* Hashing function to map blogs to databases.
*
* Implements a consistent hashing scheme using Flexihash.
*/
function noblogs_db_callback($query, $wpdb) {
$wpdb_hash = $wpdb->hash_map;
if (preg_match("/^{$wpdb->base_prefix}(\d+)_/", $wpdb->table, $matches)) {
$blog_id = $matches[1];
return $wpdb_hash->lookup($blog_id);
}
}
$wpdb->add_callback('noblogs_db_callback');
#!/opt/noblogs/cron/php-noblogs
<?php
// Load wordpress api.
define('WP_CACHE',false);
/** Setup WordPress environment */
require_once('wp-load.php');
require_once('db-config.php');
function old_hash($dbid) {
if (($dbid % 2) == 0) {
return array('host' => '172.16.1.3',
'port' => '3307',
'user' => 'noblogs',
'password' => 'n0bl0gst3st',
'db' => 'noblogs_2');
} else {
return array('host' => '172.16.1.8',
'port' => '3307',
'user' => 'noblogsusr',
'password' => 'n0bl0gsdb4xpw!',
'db' => 'noblogs');
}
}
function new_hash($dbid, $reversemap) {
global $wpdb;
$lookup = $wpdb->hash_map->lookup($dbid);
$backend = $reversemap[$lookup];
$result = array();
if (preg_match('/^(.*):([0-9]*)$/', $backend['host'], $matches)) {
$result['host'] = $matches[1];
$result['port'] = $matches[2];
}
$result['user'] = $backend['user'];
$result['password'] = $backend['password'];
$result['db'] = $backend['name'];
return $result;
}
function mysqlopts(&$attrs) {
return ("-h" . $attrs['host'] . " -P" . $attrs['port'] . " -u" . $attrs['user']
. " '-p" . $attrs['password'] . "'");
}
function mysqlurl(&$attrs) {
return ("mysql://" . $attrs['user'] . "@" . $attrs['host']
. ":" . $attrs['port'] . "/" . $attrs['db']);
}
function get_all_blogs()
{
global $wpdb;
// $sql = "SELECT blog_id, domain, path FROM $wpdb->blogs WHERE public = 1 AND deleted = 0 AND archived = '0' ORDER BY domain, path";
$sql = "SELECT blog_id, domain, path FROM $wpdb->blogs WHERE public = 0 ORDER BY domain, path";
$result = $wpdb->get_results($sql);
return ($result);
}
$hashmap = new Flexihash();
$reverse_backend_map = noblogs_load_backends(NOBLOGS_BACKEND_CONFIG, $hashmap);
$new_counts = array();
$moved_count = 0;
$blogs = get_all_blogs();
foreach ($blogs as $blog) {
$blog_id = $blog->blog_id;
$old_params = old_hash($blog_id);
$old_dburi = mysqlurl($old_params);
$new_params = new_hash($blog_id, $reverse_backend_map);
$new_dburi = mysqlurl($new_params);
if ($new_counts[$new_params['host']]) {
$new_counts[$new_params['host']] += 1;
} else {
$new_counts[$new_params['host']] = 1;
}
if ($old_dburi != $new_dburi) {
echo "echo moving blog $blog_id from " . $old_params['host'] . " to " . $new_params['host'] . "\n";
echo "tables=\$(mysql " . mysqlopts($old_params) . " " . $old_params['db'] . " -NBe \"show tables like 'wp\\_" . $blog_id . "\\_%'\")\n";
echo "mysqldump --opt " . mysqlopts($old_params) . " " . $old_params['db'] . " \${tables} \\\n";
echo " | mysql " . mysqlopts($new_params) . " " . $new_params['db'] . "\n";
$moved_count += 1;
} else {
echo "echo blog $blog_id stays on " . $old_params['host'] . "\n";
}
}
echo "\n\n\nBlog distribution:\n";
print_r($new_counts);
print "\n $moved_count blogs moved\n";
#!/opt/noblogs/cron/php-noblogs
<?php
/*
Parse command line options
*/
$opts = getopt('',array('no-database', 'calc-size', 'db-only'));
if (array_key_exists('calc-size', $opts)) {
$opts['no-database'] = true;
$g_added_size = array();
}
$new_topology = array_pop($argv);
$old_topology = array_pop($argv);
if (!($new_topology && $old_topology)) {
usage();
exit(-1);
}
// Get all blogs
define('WP_CACHE',false);
require_once('wp-load.php');
require_once('db-config.php');
$blogs = get_all_blogs();
$n_hashmap = new Flexihash();
$new_map = noblogs_load_backends($new_topology, $n_hashmap);
$o_hashmap = new Flexihash();
$old_map = noblogs_load_backends($old_topology, $o_hashmap);
foreach ($blogs as $blog) {
$blog_id = $blog->blog_id;
$old_params = fhash($blog_id, $old_map, $o_hashmap);
$old_dburi = mysqlurl($old_params);
$new_params = fhash($blog_id, $new_map, $n_hashmap);
$new_dburi = mysqlurl($new_params);
if ($new_counts[$new_params['host']]) {
$new_counts[$new_params['host']] += 1;
} else {
$new_counts[$new_params['host']] = 1;
}
if ($old_dburi != $new_dburi) {
echo "echo moving blog $blog_id from " . $old_params['host'] . " to " . $new_params['host'] . "\n";
if ( !array_key_exists('no-database', $opts) ) {
echo "tables=\$(mysql " . mysqlopts($old_params) . " " . $old_params['db'] . " -NBe \"show tables like 'wp\\_" . $blog_id . "\\_%'\")\n";
echo "mysqldump --opt " . mysqlopts($old_params) . " " . $old_params['db'] . " \${tables} \\\n";
echo " | mysql " . mysqlopts($new_params) . " " . $new_params['db'] . "\n";
}
if (!array_key_exists('db-only',$opts)) {
if (array_key_exists('calc-size', $opts)) {
# $cmd = sprintf("ssh root@%s du -sk /opt/noblogs/www/wp-content/blogs.dir/%d", $old_params['host'], $blog_id);
$cmd = sprintf("du -sk /opt/noblogs/www/wp-content/blogs.dir/%d", $blog_id);
list($size, $dummy) = explode("\t",exec($cmd, $ret));
$g_added_size[$new_params['host']] += $size;
} else {
printf("ssh root@%s rsync -avz --delete /opt/noblogs/www/wp-content/blogs.dir/%d root@%s:/opt/noblogs/www/wp-content/blogs.dir/\n", $old_params['host'], $blog_id, $new_params['host']);
}
}
$moved_count += 1;
} else {
echo "echo blog $blog_id stays on " . $old_params['host'] . "\n";
}
}
echo "\n\n\nBlog distribution:\n";
print_r($new_counts);
echo "\n $moved_count blogs moved\n";
if (array_key_exists('calc-size', $opts)) {
echo "Variations in disk space for hosts (kB):\n";
print_r($g_added_size);
}
function fhash($dbid, $reversemap, $hashptr) {
$lookup = $hashptr->lookup($dbid);
$backend = $reversemap[$lookup];
$result = array();
if (preg_match('/^(.*):([0-9]*)$/', $backend['host'], $matches)) {
$result['host'] = $matches[1];
$result['port'] = $matches[2];
}
$result['user'] = $backend['user'];
$result['password'] = $backend['password'];
$result['db'] = $backend['name'];
return $result;
}
function mysqlopts(&$attrs) {
return ("-h" . $attrs['host'] . " -P" . $attrs['port'] . " -u" . $attrs['user']
. " '-p" . $attrs['password'] . "'");
}
function mysqlurl(&$attrs) {
return ("mysql://" . $attrs['user'] . "@" . $attrs['host']
. ":" . $attrs['port'] . "/" . $attrs['db']);
}
function get_all_blogs()
{
global $wpdb;
$sql = "SELECT blog_id, domain, path FROM $wpdb->blogs WHERE deleted = 0 AND archived = '0' ORDER BY domain, path";
$result = $wpdb->get_results($sql);
return ($result);
}
function usage() {
$str = <<<USAGE
noblogs-new-topology.php [--no-database|--calc-size] <OLD_MAP> <NEW_MAP>
USAGE;
echo $str;
}
......@@ -18,11 +18,6 @@ if ( defined('DB_CONFIG_FILE') && file_exists( DB_CONFIG_FILE ) ) {
/** The config file resides in ABSPATH. **/
define( 'DB_CONFIG_FILE', ABSPATH . 'db-config.php' );
} elseif ( file_exists( dirname(ABSPATH) . '/db-config.php' ) && ! file_exists( dirname(ABSPATH) . '/wp-settings.php' ) ) {
/** The config file resides one level above ABSPATH but is not part of another install. **/
define( 'DB_CONFIG_FILE', dirname(ABSPATH) . '/db-config.php' );
} else {
/** Lacking a config file, revert to the standard database class. **/
......@@ -40,12 +35,6 @@ define( 'HYPERDB_LAG_UNKNOWN', 3 );
class hyperdb extends wpdb {
/**
* A/I patch!
* store a FlexiHash() instance here.
*/
var $hash_map;
/**
* The last table that was queried
* @var string
......@@ -783,11 +772,6 @@ class hyperdb extends wpdb {
if ( ! is_resource($this->dbh) )
return false;
// A/I: prevent mysql-proxy to cache the admin section
if (strpos($_SERVER['REQUEST_URI'], '/wp-admin/') !== false) {
$query .= ' /* NO CACHE */';
}
$this->timer_start();
$this->result = mysql_query($query, $this->dbh);
$elapsed = $this->timer_stop();
......@@ -1020,25 +1004,8 @@ class hyperdb extends wpdb {
} // class hyperdb
/**
* AI custom: Logs all queries for debugging purposes
*/
function ai_log_db_queries($query, $time, $backtrace=null, hyperdb $obj ) {
$fh = fopen( ABSPATH . '/profiling/noblogs_queries_'. date('Ymd') . '.log', 'a');
if (!$fh) {
return array($query, $time, $backtrace);
}
fwrite($fh, sprintf("##\n#Date: %s\n#Query time: %s\n%s\n", date('r'), $time, $query));
fclose($fh);
return array($query, $time, $backtrace);
}
$wpdb = new hyperdb();
if ( AI_DB_PROFILER === true ) {
$wpdb->save_queries = true;
$wpdb->save_query_callback = 'ai_log_db_queries';
}
require( DB_CONFIG_FILE );
?>
......@@ -4,53 +4,57 @@
//
// Stampa una mappa delle assegnazioni blog -> backend, per NGINX.
// IP interno del master.
define('NOBLOGS_MASTER', '172.16.1.10');
// Load wordpress api.
define('WP_CACHE',false);
require_once('/opt/noblogs/www/wp-load.php');
// flexihash
require_once(ABSPATH . 'r2db/backends.php');
$mdata = noblogs_db_split(NOBLOGS_MASTER_CONFIG);
$noblogs_master = $mdata['host'];
// Return all blogs.
function get_blogs() {
global $wpdb;
$sql = "SELECT blog_id, domain FROM $wpdb->blogs WHERE deleted = 0 AND archived = '0' ORDER BY domain ASC";
$result = $wpdb->get_results($sql);
return ($result);
global $wpdb;
$sql = "SELECT blog_id, domain FROM $wpdb->blogs WHERE deleted = 0 AND archived = '0' ORDER BY domain ASC";
$result = $wpdb->get_results($sql);
return ($result);
}
function printline($s) {
echo $s . "\n";
echo $s . "\n";
}
function backend_to_http_endpoint($backend) {
if (substr($backend, 0, 8) != 'backend_') {
error_log('diamine, di questo backend non so che farmene: ' . $backend);
return NOBLOGS_MASTER . ':82';
}
$id = substr($backend, 8);
return '172.16.1.' . $id . ':82';
global $noblogs_master;
if (substr($backend, 0, 8) != 'backend_') {
error_log('diamine, di questo backend non so che farmene: ' . $backend);
return $noblogs_master . ':82';
}
$id = substr($backend, 8);
return '172.16.1.' . $id . ':82';
}
// Print the blog -> backend map.
function generate_map() {
global $wpdb;
$wpdb_hash = &$wpdb->hash_map;
$blogs = get_blogs();
printline('map $http_host $backend_noblogs {');
printline(' default http://' . NOBLOGS_MASTER . ':82;');
foreach ($blogs as $blog) {
$blog_id = $blog->blog_id;
if ($blog_id == 1)
continue;
$backend_id = $wpdb_hash->lookup($blog_id);
$backend_http = backend_to_http_endpoint($backend_id);
printline(' ' . $blog->domain . ' http://' . $backend_http . ';');
}
printline('}');
global $wpdb;
global $noblogs_master;
$hash = noblogs_load_backends(NOBLOGS_BACKEND_CONFIG);
$blogs = get_blogs();
printline('map $http_host $backend_noblogs {');
printline(' default http://' . $noblogs_master . ':82;');
foreach ($blogs as $blog) {
$blog_id = $blog->blog_id;
if ($blog_id == 1)
continue;
$backend_id = $hash->lookup($blog_id);
$backend_http = backend_to_http_endpoint($backend_id);
printline(' ' . $blog->domain . ' http://' . $backend_http . ';');
}
printline('}');
}
generate_map();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment