From 7959cf99249df7a0d18ab6534e826c4db5ad453d Mon Sep 17 00:00:00 2001 From: joe <joe@autistici.org> Date: Thu, 28 Nov 2013 06:54:17 +0100 Subject: [PATCH] Added README --- README.md | 253 ++++++++++++++++++++++++++++++++++++++++++++++ db-config.php | 6 +- wp-content/db.php | 3 - 3 files changed, 254 insertions(+), 8 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..78ec59ceb --- /dev/null +++ b/README.md @@ -0,0 +1,253 @@ +Noblogs documentation +===================== + +A distributed, scalable version of WordPress, with some pre-bundled +(and audited) plugins, and some privacy-related tweaks. + +This software is intended to be used as a basis for a large +(potentially, very large), geographically distributed, WordPress +multisite installation. + +Although some of our patches can be of general use for a WordPress +user that cares about his privacy, this WordPress version is meant to +be deployed on multiple servers. + + +Architecture +------------ + +Scaling WordPress is usually achieved through some form of database +partitioning using HyperDB. This will not work very well in a +high-latency scenario, such as a geographically distributed cluster +(i.e. commodity hardware on different ISPs), due to the very high +number of (sequential) database requests needed to render a single +WordPress page. + +We are solving the distribution problem in a high-latency cluster as +follows: + +* Each blog is completely local to one server (images, files, + database) and is served locally from that server + +* Global data (the main site, buddypress, users, blog index, etc) is + stored on a separate database, relatively small, that is replicated + to all nodes with a master-slave setup. Using HyperDB, we're able to + read from the local replica and to write to the remote master, just + for this tiny global layer. + +* HTTP requests for a specific blog are forwarded to the right backend + by the reverse HTTP proxy layer which stands in front of the PHP + workers, so that partitioning is completely transparent to the + users. + + +Installation +------------ + +In this installation example, we assume you have at least 3 servers; +all of them with a fully-functional LAMP stack already installed. + +### Get the software + +Create a working directory for your noblogs-wp installation on all +your servers, and then clone the noblogs-wp repository: + +```bash + + $ mkdir -p /opt/noblogs/www + $ git clone https://git.autistici.org/ai/noblogs-wp.git +``` + +The noblogs-wp repository is split in an "upstream" branch, where we +keep all the upstream code, the "noblogs" branch, where we collect our +patches on top of the upstream branch, and a few "release" branches, +used in production. + +So, if you want to use a stable release branch, check it out, i.e. + +```bash + + $ git checkout noblogs-1.2.7 +``` + +### Setting up MySQL databases + +This is probably the trickiest part of the installation, so please if +you have doubts take a good look at the MySQL manuals. + +Each MySQL instance will have two datasets: + +- A global, shared dataset +- A local dataset containing data of the blogs assigned to this server + +Let's create the two datasets on each server (here we're showing +server 1): + +```bash + + $ mysqladmin create global + $ mysqladmin create local_1 + $ mysql -e "GRANT ALL PRIVILEGES ON global.* TO 'myuser'@'localhost' \ + IDENTIFIED BY 'somepassword'" + $ mysql -e "GRANT ALL PRIVILEGES ON local_1.* TO 'myuser'@'localhost' \ + IDENTIFIED BY 'someotherpassword'" +``` + +The name of the local database name should change on the other +servers, to be i.e. ``local_2``, ``local_3`` and so on. The global +dataset should be replicated, and have a master instance. So, pick one +server to be your master, and configure it to allow replication (in +the following, we assume your master will be server 1): + +```bash + + $ mysql -e "CREATE USER 'noblogs_replica' IDENTIFIED BY \ + 'a_password'" + $ mysql -e "GRANT REPLICATION SLAVE ON *.* TO 'noblogs_replica'" +``` + +On all servers, you should add to your mysql configuration file: + +``` + + binlog-do-db = global +``` + +Now set up the replica on the slaves (in our example, servers 2 and 3): + +```bash + + $ mysql -e "CHANGE MASTER TO \ + MASTER_HOST='server_1', \ + MASTER_PORT=3306, \ + MASTER_USER='noblogs_replica', \ + MASTER_PASSWORD='a_password';" +``` + +Since the databases are empty at this point, we can safely start replication: + +``` + + $ mysql -e 'SLAVE START;' + +``` + +### Configuring wp-config.php + +As every other WordPress installation, all global configuration +variables are stored in wp-config.php. You can follow the usual +instruction for a wordpress database, just remember to fill the +database information with the data regarding the global dataset. + +You'll have a few more information to fill into this file, as follows: + +```php + // The master, backends and host files (see below) + define("NOBLOGS_BACKEND_CONFIG", "/etc/noblogs/backends"); + define("NOBLOGS_MASTER_CONFIG", "/etc/noblogs/master"); + define("NOBLOGS_HOST_FILE", "/etc/noblogs/ip"); + // Set to true if you want to activate query profiling + define('AI_DB_PROFILER', false); + define('AI_LOG_HYPERDB',false); + // Number of replicas for consistent hashing. + define('R2DB_FLEXIHASH_REPLICAS', 128); + // Recaptcha keys should be global for all blogs + define('GLOBAL_RECAPTCHA_KEY', 'value_you_got'); + define('GLOBAL_RECAPTCHA_PRIVATE_KEY', 'value_of_private_key_you_got'); +``` + +### Setting up the master and backend roles + +A few file paths are currently hard-coded in our code (hopefully this +will change soon...). In order for our HyperDB configuration to work, +you need to create 3 files under /etc/noblogs. You defined previously +in `wp-config.php` three file paths: + +* `NOBLOGS_MASTER_CONFIG` +* `NOBLOGS_BACKEND_CONFIG` +* `NOBLOGS_HOST_FILE` + +The `NOBLOGS_MASTER_CONFIG` should contain: +```bash + mysql://someuser:somepassword@master-ip-address:3306/global +``` + +So, just the DSN of the master database. Beware that it is +**critical** that you use the IP address of the master here, and the +hostname later. This is due to a HyperDB limitation: it does not allow +using the same host/port for two separate targets. + +You will need to populate the `NOBLOGS_BACKEND_CONFIG` file next. Fill it +with the connection parameters for all the backends: + +```bash + 1 backend1 mysql://someuser:someotherpassword@backend_1-hostname:3306/local_1 + 2 backend2 mysql://someuser:someotherpassword@backend_2-hostname:3306/local_2 + 3 backend3 mysql://someuser:someotherpassword@backend_3-hostname:3306/local_3 +``` + +So, the format is `<ID> <HOSTNAME> <DSN>`, where the DSN is the one of +the *local* dataset. + +Finally, `NOBLOGS_HOST_FILE`: this file should contain only the IP +address of the server you're on. This is used on the MySQL slaves in +order to automatically detect whether they are the master or not. + +### Create directories + +This command creates the required directory hierarchy for noblogs-wp. +```bash + # set ROOT to the root of your noblogs installation + # i.e. ROOT="/var/www" + for subdir in blogs.dir cache uploads ; do + test -d ${ROOT}/wp-content/${subdir} \ + || mkdir -p ${ROOT}/wp-content/${subdir} + chmod 02770 ${ROOT}/wp-content/${subdir} + done +``` + +### Create the master db schema + +Just visit the install page *on the master host*, i.e.: + +http://example.com/blog/wp-admin/install.php + +this will also initialize the schema on the MySQL master, propagating +it to the slaves. + +### Create the global traffic proxy + +You need a global proxy that can redirect the HTTP request to the +correct backend, using the same partitioning function that wordpress +uses in HyperDB. + +We use NGINX as frontend/load balancer. We have a cron job that +creates the redirection map based on the partition function used by +HyperDB. This cronjob fetches `wp-nginx-map.php` from the +repository. You should tweak this script for your needs, as it has +only been tested with noblogs.org. + +### Problems? + +We expect that! Please let us know and we'll assist you and also fix +the installation instructions! + + +Additional management tools +--------------------------- + +We will release very soon our additional management tools for noblogs, +`noblogs-cli` (a set of tools to ease the day-to-day operations) and +`noblogsmv`, a state-machine that allows to rebalance easily (and +fast! as you can use as many workers as possible in parallel) the +cluster if you add or remove one backend, moving databases and blogs +around. + + +Contributing to the project +--------------------------- + +We're not ready to accept external contributors on our git platform at +the moment (but this will change). For now, just prepare a patch and +let us know at info at autistici dot org. You can find us hanging +around in the #ai channel on irc.autistici.org. diff --git a/db-config.php b/db-config.php index f1b35d24e..365abb2b8 100644 --- a/db-config.php +++ b/db-config.php @@ -4,15 +4,11 @@ $wpdb->persistent = true; $wpdb->max_connections = 30; -define("NOBLOGS_BACKEND_CONFIG", "/etc/noblogs/backends"); -define("NOBLOGS_MASTER_CONFIG", "/etc/noblogs/master"); -define("NOBLOGS_HOST_FILE", "/etc/noblogs/ip_ring0"); - include_once('r2db/db-hash.php'); include_once('r2db/db-backends.php'); // Set up global dataset with master databases -noblogs_load_global_dataset(NOBLOGS_MASTER_CONFIG, NOBLOGS_HOST_FILE); +noblogs_load_global_dataset(NOBLOGS_MASTER_CONFIG, NOBLOGS_HOST_FILE); // Add all the sharded blog databases. $wpdb_reverse_backend_map = noblogs_load_backends(NOBLOGS_BACKEND_CONFIG, $wpdb->hash_map); diff --git a/wp-content/db.php b/wp-content/db.php index 2bd49b416..8829d102f 100644 --- a/wp-content/db.php +++ b/wp-content/db.php @@ -1,7 +1,4 @@ <?php -//AI patch: set to true if you want to activate query profiling -define('AI_DB_PROFILER', false); - /* Plugin Name: HyperDB Plugin URI: https://wordpress.org/plugins/hyperdb/ -- GitLab