From 8d4815f33df646b496284802c44dbdbac7eedbe2 Mon Sep 17 00:00:00 2001 From: gravel Date: Thu, 13 Apr 2023 20:32:37 +0200 Subject: [PATCH] Docs, pruning & whitespace --- .phpenv | 2 +- CONTRIBUTING.md | 14 +- README.md | 25 +-- output/js/constants.js | 2 +- output/main.js | 2 +- output/styles2.css | 16 +- php/fetch-servers.php | 72 ++++---- php/generate-html.php | 12 +- php/getenv.php | 6 +- php/utils/logging.php | 57 +++++- php/utils/servers-rooms.php | 242 +++++++++++++++++++------- php/utils/utils.php | 107 +++++++++--- sites/+components/page-head.php | 4 +- sites/+components/tbl_communities.php | 4 +- sites/+instructions/English.txt | 2 +- sites/+instructions/Russian.txt | 2 +- sites/index.php | 12 +- 17 files changed, 414 insertions(+), 167 deletions(-) diff --git a/.phpenv b/.phpenv index 960b551f..8040b40c 100644 --- a/.phpenv +++ b/.phpenv @@ -6,7 +6,7 @@ $DOCUMENT_ROOT="$PROJECT_ROOT/output"; $TEMPLATES_ROOT="$PROJECT_ROOT/sites"; $LANGUAGES_ROOT="$PROJECT_ROOT/languages"; - + include_once "$PROJECT_ROOT/php/utils/logging.php"; // set timeout for file_get_contents() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1cb5dc17..bc836be5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ ### Prerequisites -- PHP (version TBD) +- PHP 8.1+ - `make` - `entr` to watch for file changes - `xdg-open` link handler to invoke browser @@ -44,22 +44,22 @@ See [`Makefile`](Makefile) for more details. **Identifier casing**: `snake_case` and `CONSTANT_CASE` -**Comments and documentation**: TBD +**Comments and documentation**: [PHPDoc](https://en.wikipedia.org/wiki/PHPDoc) ### HTML & CSS -**Identifier casing**: `kebab-case`, occasional `snake_case` +**Identifier casing**: `kebab-case`, legacy `snake_case` -**Comments and documentation**: TBD +**Comments and documentation**: PHP no-ops instead of HTML comments, if possible ### JavaScript -**Identifier casing**: `camelCase` and `CONSTANT_CASE`, occasional `snake_case` +**Identifier casing**: `camelCase` and `CONSTANT_CASE`, occasional `snake_case` for references to DOM **Comments and documentation**: [JSDoc](https://jsdoc.app/) ## Contact - Web Development Session Community on [caliban.org](https://sog.caliban.org/) -- Project lead, querying logic, deployment, community filtering: `someguy` on Session -- Documentation, code quality, HTML generation, CSS, JS: `gravel` on Session +- Project lead, deployment, community filtering: `someguy` on Session +- Documentation, code quality, querying logic, HTML generation, CSS, JS: `gravel` on Session diff --git a/README.md b/README.md index 0cab54ff..4dd696de 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Currently this script crawls the following sites: - - -Additionally, a few other servers are hardcoded, see [querying logic](php/fetch-servers.php). +Additionally, [a few other servers are hardcoded](php/servers/known-servers.php). ### How does this work? @@ -34,15 +34,14 @@ The [`update-listing.php`](php/update-listing.php) script invokes the following The querying logic consists of these steps: -1. Fetching source HTML: `get_html_from_known_sources()` -1. Extracting Session invites from the HTML: -`extract_join_links_from_html()` and `get_servers_from_join_links()` -1. Making sure servers are online: `reduce_servers()` -1. Querying the servers for all available rooms -and normalizing active user numbers: `query_servers_for_rooms()` -1. De-duplicating servers based on public keys: -`get_pubkeys_of_servers()` and `reduce_addresses_of_pubkeys()` -1. Aggregating all server info & adding language data: `generate_info_arrays()` +1. Fetching source HTML: `query_known_sources()` +1. Extracting Session join URLs from the HTML: `parse_join_links()` +1. Building server instances from join URLs: `CommunityServer::from_join_urls()` +1. Adding known servers to list: `CommunityServer::from_known_hosts()` +1. Merging servers based on URL: `CommunityServer::dedupe_by_url()` +1. Making sure servers are online and querying rooms & pubkeys: +`CommunityServer::poll_reachable()` +1. Merging servers based on public keys: `CommunityServer::dedupe_by_pubkey()` Static HTML is generated from the [`sites`](sites) directory to the [`output`](output) directory, which additionally contains static assets. All contents of `sites` are invoked to produce a HTML page unless they are prefixed with a `+` sign. @@ -56,7 +55,7 @@ This happens randomly. To make sure this won't affect the results, we simply check whether the server is online (the initial connection being successful), and then retry a lot of times with a short timeout until we eventually get the content. -The details can be seen in `curl_get_contents()`. +The details can be seen in [`curl_get_contents()`](php/utils/utils.php). ### Official repositories @@ -68,6 +67,10 @@ you can issue a pull request here: - +### Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md). + ## Contact If you want to contact me, you can add me on Session via my diff --git a/output/js/constants.js b/output/js/constants.js index 2db0fc65..9985128a 100644 --- a/output/js/constants.js +++ b/output/js/constants.js @@ -5,7 +5,7 @@ export const dom = { /** @return {HTMLTableElement | null} */ tbl_communities: () => document.getElementById("tbl_communities"), - tbl_communities_content_rows: + tbl_communities_content_rows: () => Array.from(dom.tbl_communities()?.rows)?.filter(row => !row.querySelector('th')), meta_timestamp: () => document.querySelector('meta[name=timestamp]'), last_checked: () => document.getElementById("last_checked_value"), diff --git a/output/main.js b/output/main.js index 29fc35b8..467ff3ab 100644 --- a/output/main.js +++ b/output/main.js @@ -270,7 +270,7 @@ function setSortState(table, { ascending, column }) { } table.setAttribute(ATTRIBUTES.SORTING.ASCENDING, ascending); table.setAttribute(ATTRIBUTES.SORTING.COLUMN, column); - + // No way around this for brief CSS. const headers = table.querySelectorAll("th"); headers.forEach((th, colno) => { diff --git a/output/styles2.css b/output/styles2.css index 2aded118..84786a33 100644 --- a/output/styles2.css +++ b/output/styles2.css @@ -1,6 +1,6 @@ :root { --alternate-row-color: #e8e8e8; - --body-margin: 8px; /* Default value in browsers */ + --body-margin: 8px; /* Default value in browsers */ --max-font-size-unitless: 18; /* Measurements of the width of all columns @@ -19,19 +19,19 @@ /* Space left for the name and description columns, in the collapsed and expanded cases. */ - --collapsed-dynamic-columns-width: + --collapsed-dynamic-columns-width: calc( - 100vw + 100vw - var(--collapsed-static-column-width) - 2 * var(--body-margin); ); - - --expanded-dynamic-columns-width: + + --expanded-dynamic-columns-width: calc( - 100vw + 100vw - var(--expanded-static-column-width) ); - + /* Default for wide screens. */ --dynamic-columns-width: var(--expanded-dynamic-columns-width); } @@ -80,7 +80,7 @@ header { #tbl_communities { margin: 0 auto; - + --cell-padding-h: 0.5em; --cell-padding-v: 0.5em; --cell-padding: var(--cell-padding-h) var(--cell-padding-v); diff --git a/php/fetch-servers.php b/php/fetch-servers.php index e867c366..b2d31ef7 100644 --- a/php/fetch-servers.php +++ b/php/fetch-servers.php @@ -4,73 +4,78 @@ require_once 'getenv.php'; require_once 'utils/utils.php'; require_once 'servers/known-servers.php'; - include_once "$LANGUAGES_ROOT/language_flags.php"; // actually runs fine without it require_once 'utils/servers-rooms.php'; + // Not required + include_once "$LANGUAGES_ROOT/language_flags.php"; + + /** + * Fetch online Communities and write the resulting data to disk. + * Communities are fetched as follows: + * + * 1. Get join links from our sources + * 2. Parse join links into servers + * 3. Add hardcoded servers + * 4. De-dupe servers based on base URL + * 5. Fetch server rooms and pubkey + * 6. De-dupe servers based on pubkey + */ function main() { global $LOGGING_VERBOSITY; - // Get join links -> Add known servers -> - // De-dupe based on base URL -> - // Test domains -> De-dupe based on pubkey + // Read the -v|--verbose option increasing logging verbosity to debug. $options = getopt("v", ["verbose"]); if (isset($options["v"]) or isset($options["verbose"])) { $LOGGING_VERBOSITY = LoggingVerbosity::Debug; } - + global $CACHE_ROOT, $ROOMS_FILE, $KNOWN_SERVERS, $KNOWN_PUBKEYS; + // Create default directories with conservative permissions. file_exists($CACHE_ROOT) or mkdir($CACHE_ROOT, 0700); + // Query our sources and store the resulting HTML. // TODO: Tag information is currently discarded. Feature needs triage. $html_pages = query_known_sources(); - // Find join links in each HTML document and concatenate the results. + // Find join links in each HTML document and merge the resulting arrays. $join_links = array_merge([], ...array_map('parse_join_links', $html_pages)); + /** * @var CommunityServer[] $servers */ $servers = CommunityServer::from_join_urls($join_links); + // Add known hosts. $servers = [...CommunityServer::from_known_hosts($KNOWN_SERVERS, $KNOWN_PUBKEYS), ...$servers]; + + // Merge servers with the same URL. $servers = CommunityServer::dedupe_by_url($servers); + + // Fetch server data and filter unreachable servers. $servers = CommunityServer::poll_reachable($servers); + + // Merge servers with the same public key. $servers = CommunityServer::dedupe_by_pubkey($servers); + + // Count servers and rooms. $servers_total = count($servers); $rooms_total = count_rooms($servers); - - // Output query results to file. + log_info("Done fetching communities."); log_info( - "Found $rooms_total unique Session Communities " . + "Found $rooms_total unique Session Communities " . "on $servers_total servers." . PHP_EOL ); - file_put_contents($ROOMS_FILE, json_encode($servers)); - } - /** - * Iteratively crawls an index for individual Session Community details. - * @return string[] - */ - function crawl_source_index($html, $url_base, $item_url_pattern) { - preg_match_all($item_url_pattern, $html, $match_result); - $matched_links = $match_result[0]; - - foreach ($matched_links as $link) { - $link = $url_base . $link; - log_debug("Requesting $link"); - $pages[] = file_get_contents($link); - log_debug($http_response_header[0]); - // Supposed to be "HTTP/1.1 200 OK" - } - - return $pages; + // Output fetching results to file. + file_put_contents($ROOMS_FILE, json_encode($servers)); } /** - * Fetches known sources of Session - * Community join links. + * Fetches known sources of Session Community join links. + * @return string[] HTML pages containing join URLs. */ - function query_known_sources() { + function query_known_sources(): array { global $SOURCES; log_info("Requesting Awesome Session Group list..."); @@ -96,8 +101,9 @@ return [...$pages_asgl, ...$pages_loki, ...$pages_sdir, ...$pages_fark]; } - /* + /** * Debug function to see which communities use pinned messages already + * @deprecated */ function print_pinned_messages($room_assignments_arr) { // for each server a.k.a. public key do @@ -126,7 +132,7 @@ } } - // run main function + // Fetch servers main(); ?> diff --git a/php/generate-html.php b/php/generate-html.php index 9e686d99..318ed6af 100644 --- a/php/generate-html.php +++ b/php/generate-html.php @@ -1,11 +1,11 @@ \ No newline at end of file diff --git a/php/utils/logging.php b/php/utils/logging.php index 82309d10..3d340aa6 100644 --- a/php/utils/logging.php +++ b/php/utils/logging.php @@ -1,10 +1,14 @@ "\033[31m", @@ -47,7 +59,7 @@ } /** - * Calculate process runtime as [s, ns]. + * Calculate current process runtime as [s, ns]. * @return int[] Seconds and nanoseconds. */ function hrtime_interval() { @@ -60,9 +72,10 @@ } /** - * Format process runtime to milisecond precision. + * Format current process runtime to milisecond precision. + * @return string Runtime ninutes, seconds, and miliseconds as string. */ - function runtime_str() { + function runtime_str(): string { list($s, $ns) = hrtime_interval(); return ( date('i:s.', $s) . @@ -81,25 +94,55 @@ fwrite(STDERR, $color_marker . "[$runtime] [$marker] $msg$color_reset" . PHP_EOL); } + /** + * Logs the given message as an error to stderr. + * Only logs when `$LOGGING_VERBOSITY` is Error and below. + * @param string $msg String message to log. + */ function log_error(string $msg) { _log_message($msg, LoggingVerbosity::Error); } + /** + * Logs the given message as a warning to stderr. + * Only logs when `$LOGGING_VERBOSITY` is Warning and below. + * @param string $msg String message to log. + */ function log_warning(string $msg) { _log_message($msg, LoggingVerbosity::Warning); } + /** + * Logs the given message as an info message to stderr. + * Only logs when `$LOGGING_VERBOSITY` is Info and below. + * @param string $msg String message to log. + */ function log_info(string $msg) { _log_message($msg, LoggingVerbosity::Info); } + /** + * Logs the given message as a debug message to stderr. + * Only logs when `$LOGGING_VERBOSITY` is Debug and below. + * @param string $msg String message to log. + */ function log_debug(string $msg) { _log_message($msg, LoggingVerbosity::Debug); } + /** + * Logs the given value in a debug message to stderr. + * Only logs when `$LOGGING_VERBOSITY` is debug and below. + * @param string $msg String message to log. + */ function log_value(mixed $value) { log_debug(var_export($value, true)); } + /** + * @var $LOGGING_VERBOSITY + * Global setting. + * Controls how detailed the displayed logs are. + */ $LOGGING_VERBOSITY = LoggingVerbosity::Info; ?> diff --git a/php/utils/servers-rooms.php b/php/utils/servers-rooms.php index 20e7cde1..57f86ae7 100644 --- a/php/utils/servers-rooms.php +++ b/php/utils/servers-rooms.php @@ -2,30 +2,75 @@ include_once "$PROJECT_ROOT/languages/language_flags.php"; + /** + * Representation of Session Community room. + */ class CommunityRoom implements JsonSerializable { - /** @var CommunityServer $server */ + /** @var CommunityServer $server Server this room belongs to. */ public readonly object $server; + /** @var ?int $active_users Number of active users in the defined period. */ public readonly ?int $active_users; + /** @var ?int $active_users_cutoff Period for `$active_users`, in seconds. */ public readonly ?int $active_users_cutoff; + /** @var string $token Room name in Community API. */ public readonly string $token; + /** @var ?string $name User-facing name given to Community. */ public readonly ?string $name; + /** @var ?string[] $admins The mixed Session IDs of public room admins. */ public readonly ?array $admins; + /** @var ?string[] $moderators The mixed Session IDs of public room moderators. */ public readonly ?array $moderators; + /** @var ?float $created UNIX timestamp of room creation, in seconds. */ public readonly ?float $created; + /** @var ?string $description User-facing description given to Community. */ public readonly ?string $description; + /** @var ?int $image_id Optional file ID for this room's icon, as served under the room files. */ public readonly ?int $image_id; + /** @var ?int $info_updates Monotonic integer counter that increases + * whenever the room's metadata changes. */ public readonly ?int $info_updates; + /** @var ?int $message_sequence Monotonic room post counter that + * increases each time a message is posted, edited, or deleted in this room. */ public readonly ?int $message_sequence; + /** + * @var ?bool $read + * This boolean flag indicates whether a regular user + * has permission to read messages in this room. + */ public readonly ?bool $read; + /** + * @var ?bool $upload + * This boolean flag indicates whether a regular user + * has permission to upload files to this room. + */ public readonly ?bool $upload; + /** + * @var ?bool $write + * This boolean flag indicates whether a regular user + * has permission to write messages to this room. + */ public readonly ?bool $write; - /** @var string[] $tags */ - public readonly array $tags; // Custom properties - public readonly string $language_flag; - private function __construct($server, array $details) { + /** + * @var string $language_flag + * Optional Unicode emoji of region matching + * the primary language of this room. + * + * Custom attribute. + */ + public readonly ?string $language_flag; + + /** + * @var string[] $tags + * String tags applied to room by creator or submitter. + * + * Custom attribute. + */ + public readonly array $tags; + + private function __construct(\CommunityServer $server, array $details) { global $languages; $this->server = $server; @@ -51,14 +96,14 @@ ? $details['tags'] : []; - $this->language_flag = + $this->language_flag = isset($languages[$room_identifier]) ? $languages[$room_identifier] : ""; } /** - * Return all room data to be serialized to JSON + * Return information for JSON serialization. */ function jsonSerialize(): array { $details = get_object_vars($this); @@ -67,7 +112,7 @@ } /** - * Create a CommunityRoom instance from data. + * Create a CommunityRoom instance from loaded data. * @param CommunityServer $server */ public static function from_details($server, array $details) { @@ -75,7 +120,7 @@ } /** - * Create an array of CommunityRoom instances from data. + * Create an array of CommunityRoom instances from loaded data. * @param array[] $details * @return CommunityRoom[] */ @@ -102,29 +147,41 @@ return time() - $this->created; } + /** + * Return the browser preview URL for this room. + */ function get_preview_url(): string { $base_url = $this->server->base_url; $token = $this->token; return "$base_url/r/$token"; } + /** + * Return the QR code invite URL for this room. + */ function get_invite_url(): string { $base_url = $this->server->base_url; $token = $this->token; return "$base_url/r/$token/invite.png"; } + /** + * Return the in-app join URL for this room. + */ function get_join_url(): string { $base_url = $this->server->base_url; $pubkey = $this->server->pubkey; $token = $this->token; return "$base_url/$token?public_key=$pubkey"; } - + + /** + * Return the URL of this room's designated icon. + */ function get_icon_url(): string | bool { $image_id = $this->image_id; - if ($image_id == null) + if ($image_id == null) return false; $base_url = $this->server->base_url; @@ -134,8 +191,8 @@ } /** - * Returns our format of room identifier, - * i.e. token+pubkey[:4] + * Return our format of room identifier. + * @return string String in the form `token+pubkey[:4]`. */ function get_room_identifier(): string { $token = $this->token; @@ -143,28 +200,33 @@ return "$token+$pubkey_4"; } } - - $SERVER_STRINGIFY_MODE = 0; - + + /** + * Class representing Session Community server hosting Community rooms. + */ class CommunityServer implements JsonSerializable { - // public static int $STRINGIFY_MODE = 0; - + /** @var string $base_url The root URL of this server. */ public string $base_url = ""; + /** @var string $pubkey The SOGS protocol pubkey of this server. */ public string $pubkey = ""; + /** @var ?\CommunityRoom[] Array of rooms hosted by this server. */ public ?array $rooms = null; - /** - * @var string[] $room_hints + /** + * @var string[] $room_hints * This array contains fallback room tokens collected from links. * Used only if fetching rooms list fails. */ private array $room_hints = []; - + private function __construct() {} /** - * Compare two CommunityServer instances by URL. - * @param CommunityServer $a - * @param CommunityServer $b + * Compare two CommunityServer instances by base URL. + * @param CommunityServer $a First server to compare URLs. + * @param CommunityServer $b Second server to compare URLs. + * @return int A number less than, equal to, or greater than zero + * when the servers are in correct order, interchangable, or in reverse order, + * respectively. */ static function compare_by_url($a, $b): int { return strcmp( @@ -174,7 +236,7 @@ } /** - * Sort an array of servers in-place based on URL. + * Sort an array of servers in place based on URL. * @param CommunityServer[] &$servers */ static function sort_by_url(array &$servers) { @@ -183,16 +245,18 @@ /** * Compare two CommunityServer instances by public key. - * @param CommunityServer $a - * @param CommunityServer $b + * @param CommunityServer $a First server to compare public keys. + * @param CommunityServer $b Second server to compare public keys. + * @return int A number less than, equal to, or greater than zero + * when the servers are in correct order, interchangable, or in reverse order, + * respectively. */ static function compare_by_pubkey($a, $b): int { return strcmp($a->pubkey, $b->pubkey); } - /** - * Sorts an array of servers in-place by public key. + * Sorts an array of servers in place by public key. * @param CommunityServer[] $servers */ public static function sort_by_pubkey(&$servers) { @@ -200,7 +264,7 @@ } /** - * Absorbs extra info from another instance describing the same server. + * Absorbs extra info from another instance of the same server. * @param CommunityServer $server */ private function merge_from($server) { @@ -235,9 +299,9 @@ } /** - * Merges consecutive servers on equality of given attribute. - * @param CommunityServer[] $servers Servers sorted by attribute- - * @param string $key Method call whose result to merge servers by. + * Merges consecutive servers in array in place on equality of given attribute. + * @param CommunityServer[] $servers Servers sorted by given attribute. + * @param string $method Method name to retrieve attribute from server. */ private static function merge_by(&$servers, string $method) { // Backwards-merging to preserve indexing for unprocessed servers. @@ -251,6 +315,9 @@ } } + /** + * Write details about this server to debug log. + */ private function log_details() { $base_url = $this->base_url; $count_rooms = count($this->rooms ?? []); @@ -261,8 +328,8 @@ /** * Filters the given servers to remove URL duplicates. - * @param CommunityServer[] $servers - * @return CommunityServer[] + * @param CommunityServer[] $servers Servers to merge by URL. + * @return CommunityServer[] Merged URL-unique servers. */ public static function dedupe_by_url($servers) { CommunityServer::sort_by_url($servers); @@ -276,34 +343,41 @@ /** * Filters the given servers to remove pubkey duplicates. - * @param CommunityServer[] $servers - * @return CommunityServer[] + * @param CommunityServer[] $servers Servers to merge by public key. + * @return CommunityServer[] Merged pubkey-unique servers. */ public static function dedupe_by_pubkey($servers) { CommunityServer::sort_by_pubkey($servers); CommunityServer::merge_by($servers, "get_pubkey"); - + foreach ($servers as $server) $server->merge_consistency(); return $servers; } + /** + * Return information for JSON serialization. + */ function jsonSerialize(): array { return get_object_vars($this); } /** - * @return CommunityServer[] + * Create server instances located on hardcoded hosts. + * @param string[] $hosts Array of base URLs for known servers. + * @param string[] $pubkeys + * Associative array from hostnames to SOGS public keys. + * @return CommunityServer[] Array of resulting Community servers. */ - static function from_known_hosts($hosts, $pubkeys) { + static function from_known_hosts(array $hosts, array $pubkeys) { $servers = []; foreach ($hosts as $base_url) { $server = new CommunityServer(); $server->base_url = $base_url; - + $hostname = url_get_base($base_url, false); $server->pubkey = $pubkeys[$hostname]; @@ -315,7 +389,10 @@ } /** - * @return CommunityServer[] + * Create server instances from given room join URLs. + * Resulting servers will know of the embedded room tokens. + * @param string[] $join_urls Join URLs found in the wild. + * @return CommunityServer[] Array of resulting Community servers. */ static function from_join_urls(array $join_urls) { $servers = []; @@ -332,8 +409,9 @@ } /** - * @param array $details - * @return CommunityServer + * Create Community server instance from loaded server data. + * @param array $details Decoded JSON associative data about server. + * @return CommunityServer Server represented by given data. */ static function from_details(array $details) { $server = new CommunityServer(); @@ -346,8 +424,9 @@ } /** - * @param array[] $details - * @return CommunityServer[] + Create Community server instance from array loaded server data. + * @param array $details Decoded JSON associative arrays about server. + * @return CommunityServer[] Servers represented by given data. */ static function from_details_array(array $details_array) { $servers = []; @@ -360,8 +439,10 @@ } /** - * @param CommunityServer[] $servers + * Collect the rooms among the given Community servers. + * @param CommunityServer[] $servers Array of Community servers. * @return CommunityRoom[] + * Array of all rooms contained in the given servers. */ static function enumerate_rooms($servers) { $rooms = []; @@ -372,15 +453,17 @@ } /** - * Polls all servers for rooms. - * @param CommunityServer[] $servers Servers to poll. - * @return CommunityServer[] Reachable servers. + * Polls given servers for rooms and public key and saves this info. + * Servers will be disqualified if no rooms can be found, + * and/or if no public key is obtained or hardcoded. + * @param CommunityServer[] $servers Servers to fetch. + * @return CommunityServer[] Servers polled successfully. */ public static function poll_reachable(array $servers): array { $reachable_servers = []; // Synchronous for-loop for now. - foreach ($servers as $server) { + foreach ($servers as $server) { if (!($server->fetch_rooms())) continue; // Accept failures to fetch pubkey if already known. if (!$server->fetch_or_has_pubkey()) continue; @@ -390,25 +473,44 @@ return $reachable_servers; } + /** + * Returns the URL scheme of this server. + * @return string "http" or "https". + */ function get_scheme() { return parse_url($this->base_url, PHP_URL_SCHEME); } + /** + * Reduces this server's base URL to HTTP. + */ function downgrade_scheme() { $base_url = $this->base_url; $this->base_url = "http://" . $this->get_hostname(); log_info("Downgrading $base_url to HTTP."); } - + /** + * Returns the hostname for this server. + * @return string URL with hostname and port, if applicable. + * Scheme not included. + */ function get_hostname() { return url_get_base($this->base_url, include_scheme: false); } + /** + * Returns the server's root URL. + * @return string URL with scheme, hostname, and port, if applicable. + */ function get_base_url() { return $this->base_url; } + /** + * Returns the server's public key. + * @return string SOGS pubkey as used in the Session protocol. + */ function get_pubkey() { return $this->pubkey; } @@ -437,11 +539,20 @@ $this->room_hints[] = url_get_token($join_url); } - function has_pubkey() { + /** + * Checks whether the current server SOGS public key is initialized. + * @return bool False if the public key is empty, true otherwise. + */ + function has_pubkey(): bool { return $this->pubkey != ""; } - private function fetch_room_list() { + /** + * Attempts to fetch the current server's room listing. + * Downgrades the server's scheme to HTTP if necessary. + * @return array|false Associative data about rooms if successful. + */ + private function fetch_room_list(): array|bool { $base_url = $this->base_url; list($rooms, $downgrade) = curl_get_contents_downgrade("$base_url/rooms?all=1"); if (!$rooms) { @@ -458,7 +569,12 @@ return $room_data; } - private function fetch_room_hints() { + /** + * Attempts to fetch the current server's rooms using observed room names. + * Downgrades the server's scheme to HTTP if necessary. + * @return ?array Associative data about rooms if successful. + */ + private function fetch_room_hints(): ?array { $base_url = $this->base_url; $rooms = []; @@ -494,11 +610,11 @@ } /** - * Attempt to fetch rooms for self using SOGS API. - * + * Attempt to fetch rooms for tbe current server using SOGS API. + * * @return bool True if successful, false otherwise. */ - function fetch_rooms() { + function fetch_rooms(): bool { $this->log_details(); $base_url = $this->base_url; @@ -508,7 +624,7 @@ log_value($this->room_hints); if (!url_is_reachable($base_url)) { log_warning("Reachability test failed by $base_url."); - return []; + return false; } } @@ -524,7 +640,11 @@ return true; } - function fetch_or_has_pubkey() { + /** + * Fetch the server's SOGS public key if absent. + * @return bool True if pubkey is present or has been fetched, false otherwise. + */ + function fetch_or_has_pubkey(): bool { $base_url = $this->base_url; // Do not use 'or' here; I learned the hard way. $result = $this->fetch_pubkey() || $this->has_pubkey(); @@ -534,7 +654,7 @@ /** * Attempt to fetch own public key by parsing SOGS HTML preview. - * + * * @return bool True if successful, false otherwise. */ function fetch_pubkey() { diff --git a/php/utils/utils.php b/php/utils/utils.php index e39fb77a..5d0c5e99 100644 --- a/php/utils/utils.php +++ b/php/utils/utils.php @@ -10,46 +10,84 @@ })(); /** - * @param CommunityServer[] $servers + * Counts the total rooms across the given Community servers. + * @param \CommunityServer[] $servers Community Servers to count. + * @return int Total number of Community rooms. */ - function count_rooms($servers) { + function count_rooms(array $servers): int { $rooms_total = 0; foreach ($servers as $server) { $rooms_total += count($server->rooms); } return $rooms_total; } - - function truncate($url, $len) { - return (strlen($url) > $len + 3) - ? substr($url, 0, $len).'...' - : $url; + + /** + * Truncates a string to the given length. + * @param string $str String to truncate. + * @param int $len Target ellipsised length, excluding ellipsis. + * @return string String of given length plus ellipsis, + * or original string if not longer. + */ + function truncate(string $str, int $len) { + return (strlen($str) > $len + 3) + ? substr($str, 0, $len).'...' + : $str; } - - /* - * Helper function for reduce_servers + + /** + * Check whether URL is reachable, downgrading to HTTP if needed. + * @param string $url URL to check. + * @param int $retries [optional] Number of connection attempts. + * @return bool Whether or not the server responded with a non-5XX HTTP code. */ - function url_is_reachable($url, $retries = 4) { + function url_is_reachable(string $url, int $retries = 4): bool { $retcode = curl_get_response_downgrade( - $url, retries: $retries, + $url, retries: $retries, curlopts: [CURLOPT_NOBODY => true], stop_on_codes: [404] )[0]; return $retcode != 0 && floor($retcode / 100) != 5 ; } - function curl_get_contents_downgrade(string $url, $retries = 4, $stop_on_codes = [404]) { + /** + * Fetch URL repeatedly to obtain contents, downgrading to HTTP if needed. + * @param string $url URL to fetch. + * @param int $retries [optional] Number of connection attempts. + * @param int[] $stop_on_codes [optional] + * If one of these HTTP codes is encountered, fetching stops early. + * @return array Fetched contents (if applicable), + * and whether a downgrade to HTTP took place. + * A code of 0 corresponds to an unreachable host. + */ + function curl_get_contents_downgrade(string $url, int $retries = 4, $stop_on_codes = [404]) { list($retcode, $content, $downgrade) = curl_get_response_downgrade($url, $retries, $stop_on_codes); return [$retcode == 200 ? $content : null, $downgrade]; } /** - * file_get_contents alternative that circumvents flaky routing to Chinese servers + * Fetch URL repeatedly to obtain URL contents. + * @param string $url URL to fetch. + * @param int $retries [optional] Number of connection attempts. + * @param int[] $stop_on_codes [optional] + * If one of these HTTP codes is encountered, fetching stops early. + * @return ?string Fetched contents, if applicable. */ - function curl_get_contents(string $url, $retries = 4, $stop_on_codes = [404]) { + function curl_get_contents(string $url, int $retries = 4, $stop_on_codes = [404]): ?string { return curl_get_response($url, retries: $retries, stop_on_codes: $stop_on_codes)[1]; } + /** + * Fetch URL repeatedly, downgrading to HTTP if needed. + * @param string $url URL to fetch. + * @param int $retries [optional] Number of connection attempts. + * @param int[] $stop_on_codes [optional] + * If one of these HTTP codes is encountered, fetching stops early. + * @param int[] $curlopts Associative array of options for `curl_setopt`. + * @return array Return code, fetched contents (if applicable), + * and whether a downgrade to HTTP took place. + * A code of 0 corresponds to an unreachable host. + */ function curl_get_response_downgrade( string $url, $retries = 4, $stop_on_codes = [404], $curlopts = [] ) { @@ -59,7 +97,7 @@ if ($retcode == 200) return [$retcode, $content, false]; log_debug("Downgrading to HTTP"); list($retcode, $content) = curl_get_response( - substr_replace($url, "http:", 0, strlen("https:")), + substr_replace($url, "http:", 0, strlen("https:")), ceil($retries / 2), $stop_on_codes, $curlopts ); @@ -73,7 +111,17 @@ // Low default retries value so this doesn't run for 30 minutes // FIXME: Does not seem to handle 308's, behaviour not transparent. // TODO: Parallelize & use in CommunityServer::poll_reachable() - function curl_get_response(string $url, $retries, $stop_on_codes = [404], $curlopts = []) { + /** + * Fetch URL repeatedly to obtain a valid response. + * @param string $url URL to fetch. + * @param int $retries Number of connection attempts. + * @param string[] $stop_on_codes [optional] + * If one of these HTTP codes is encountered, fetching stops early. + * @param int[] $curlopts Associative array of options for `curl_setopt`. + * @return array Return code and fetched contents, if applicable. A code of 0 corresponds + * to an unreachable host. + */ + function curl_get_response(string $url, int $retries, $stop_on_codes = [404], $curlopts = []) { // use separate timeouts to reliably get data from Chinese server with repeated tries $connecttimeout = 2; // wait at most X seconds to connect $timeout = 3; // can't take longer than X seconds for the whole curl process @@ -85,7 +133,7 @@ for ($counter = 1; $counter <= $retries; $counter++) { $curl = curl_init($url); - + // curl_setopt($curl, CURLOPT_VERBOSE, true); curl_setopt($curl, CURLOPT_AUTOREFERER, true); @@ -111,7 +159,12 @@ } /** - * Returns the scheme, hostname and optional port of a URL. + * Returns the base path of a URL. + * @param string $url The URL to slice the path from. + * @param bool $include_scheme [optional] + * Includes the scheme. `true` by default. + * @return string A URL composed of the original scheme (unless specified), + * hostname, and port (if present). */ function url_get_base(string $url, bool $include_scheme = true) { $url_components = parse_url($url); @@ -128,6 +181,8 @@ /** * Extracts the room token from a join URL. + * @param string $join_url Join URL for Session Community. + * @return string Name of Community room. */ function url_get_token(string $join_url) { $token = parse_url($join_url)['path']; @@ -137,9 +192,10 @@ /** * Extracts join links that match $REGEX_JOIN_LINK. + * @param ?string $html Text to find join URLs in. * @return string[] Sorted array of unique server join links. */ - function parse_join_links($html){ + function parse_join_links(?string $html){ global $REGEX_JOIN_LINK; preg_match_all($REGEX_JOIN_LINK, $html, $match_result); $links = $match_result[0]; @@ -150,11 +206,20 @@ /** * Convert special characters to html entities. + * @param string $str String to sanitize + * @param int $flags [optional] + * A bitmask of one or more of the following flags, + * which specify how to handle quotes, invalid code unit sequences + * and the used document type. The default is ENT_COMPAT | ENT_HTML401. + * @param bool $double_encode [optional] + * When double_encode is turned off, PHP will not encode + * existing html entities, the default is to convert everything. + * @return string The converted string, possibly empty. */ function html_sanitize( ?string $str, int $flags = ENT_QUOTES|ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true - ) { + ): ?string { if ($str == "") { return ""; } diff --git a/sites/+components/page-head.php b/sites/+components/page-head.php index 08537b40..59e3a63e 100644 --- a/sites/+components/page-head.php +++ b/sites/+components/page-head.php @@ -2,8 +2,8 @@ - -