You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
sessioncommunities.online/php/utils/sources.php

318 lines
9.8 KiB
PHP

<?php
require_once 'utils.php';
require_once 'tags.php';
class SDIRCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
/**
* Create new instance of this source from contents.
* Returns false if processing the source fails.
* @return \SDIRCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new SDIRCommunitySource($contents);
if (!$source->sdir_process_tags()) {
return false;
}
return $source;
}
private readonly string $contents;
/**
* @var string[][] $tags Array associating room IDs with string tag arrays.
*/
private array $tags;
private static function sdir_validate_entry(
array $room_entry,
bool &$missing_url,
bool &$missing_tags
): bool {
if (!isset($room_entry['url']) || !is_string($room_entry['url'])) {
log_value($room_entry);
$missing_url = true;
return false;
}
if (!isset($room_entry['tags']) || !is_string($room_entry['tags'])) {
log_value($room_entry);
$missing_tags = true;
return false;
}
return true;
}
private static function sdir_report_errors(bool $entry_missing_url, bool $entry_missing_tags) {
if ($entry_missing_url) {
log_error("One or more room entries from session.directory is missing the 'url' parameter.");
}
if ($entry_missing_tags) {
log_error("One or more room entries from session.directory is missing the 'tags' parameter.");
}
}
private function get_sdir_entries(): array|bool {
try {
return json_decode($this->contents, true, 512, JSON_THROW_ON_ERROR);
} catch (JsonException) {
return false;
}
}
private function sdir_process_tags(): bool {
$entry_missing_url = false;
$entry_missing_tags = false;
$rooms = SDIRCommunitySource::get_sdir_entries($this->contents);
if (!$rooms) {
log_error("Could not parse entries from session.directory.");
return false;
}
foreach ($rooms as $room_entry) {
if (!SDIRCommunitySource::sdir_validate_entry(
$room_entry, $entry_missing_url, $entry_missing_tags
)) {
continue;
}
$url = $room_entry['url'];
$tags = $room_entry['tags'];
$room_id = url_get_room_id($url);
$this->tags[$room_id] = explode(',', $tags);
}
SDIRCommunitySource::sdir_report_errors($entry_missing_url, $entry_missing_tags);
return true;
}
/**
* @return string[][] Array associating room IDs with string tag arrays.
*/
public function get_tags(): array {
return $this->tags;
}
}
class ASGLCommunitySource {
private function __construct(string $contents) {
$this->contents = $contents;
}
private readonly string $contents;
/**
* @var string[][] $tags;
*/
private array $tags = [];
/**
* @return \ASGLCommunitySource|false
*/
public static function from_contents(string $contents) {
$source = new ASGLCommunitySource($contents);
if(!$source->asgl_process_tags()) {
return false;
}
return $source;
}
private function asgl_process_tags(): bool {
$lines = explode("\n", $this->contents);
// $last_headings = [];
$last_room_id = null;
$room_tags = [];
foreach ($lines as $line) {
ASGLCommunitySource::asgl_process_line($line, $last_room_id, $room_tags);
if ($last_room_id != null && count($room_tags) > 0) {
$this->tags[$last_room_id] = $room_tags;
$last_room_id = null;
$room_tags = [];
}
}
return true;
}
private static function asgl_process_line(
?string $line,
?string &$last_room_id,
array &$room_tags
) {
$line = trim($line);
if (strlen($line) == 0) {
$last_room_id = null;
return;
}
$urls = parse_join_links($line);
if (count($urls) == 1 && $urls[0] == $line) {
$last_room_id = url_get_room_id($urls[0]);
$room_tags = [];
return;
}
if (str_starts_with($line, "hashtag")) {
$room_tags = ASGLCommunitySource::read_asgl_tags($line);
}
}
private static function read_asgl_tags(string $line): array {
$matches = [];
preg_match_all('/`#([^`]+)`/', $line, $matches);
// Return first group matches.
return $matches[1];
}
public function get_tags(): array {
return $this->tags;
}
}
class CommunitySources {
private const SOURCES = array(
'ASGL' => 'https://raw.githubusercontent.com/GNU-Linux-libre/Awesome-Session-Group-List/main/README.md',
'LOKI' => 'https://lokilocker.com/Mods/Session-Groups/wiki/Session-Open-Groups',
'SDIR' => 'https://session.directory/?all=groups',
'SDIR-BASE' => 'https://session.directory/',
'SDIR-PATTERN' => '/view_session_group_user_lokinet\.php\?id=\d+/',
'SDIR-JSON' => 'https://session.directory/scrape.php',
'FARK' => 'https://freearkham.cc/'
);
private readonly string $contents_asgl;
private readonly string $contents_loki;
private readonly string $contents_sdir;
private readonly string $contents_fark;
private readonly string $contents_aggregated;
/**
* Arraying associating room identifiers with arrays of raw tags.
* @var array<string,string[]> $room_tags
*/
private array $room_tags = [];
/**
* Fetches and saves known sources of Session Community join links.
*/
public function __construct() {
log_info("Requesting Awesome Session Group list...");
$this->contents_asgl = CommunitySources::fetch_source('ASGL');
log_info("Requesting Lokilocker Mods Open Group list...");
$this->contents_loki = CommunitySources::fetch_source('LOKI');
log_info("Requesting session.directory list...");
$this->contents_sdir = CommunitySources::fetch_source('SDIR-JSON');
log_info("Requesting FreeArkham.cc list...");
$this->contents_fark = CommunitySources::fetch_source('FARK');
log_info("Parsing extra information...");
if (!$this->process_sources()) {
log_error("Could not parse extra information from one or more sources.");
}
log_info('Done fetching sources.');
$this->contents_aggregated =
$this->contents_asgl .
$this->contents_fark .
$this->contents_loki .
// Slashes are escaped when served, unescape them
str_replace("\\/", "/", $this->contents_sdir);
}
private static function fetch_source(string $source_key) {
$url = CommunitySources::SOURCES[$source_key];
$contents = file_get_contents($url);
log_debug($http_response_header[0]);
if (!$contents) {
log_error("Could not fetch source from $url.");
return "";
}
return $contents;
}
/**
* @param string[][] $tags Array associating room IDs to tag arrays
*/
private function add_tags(array $tags) {
foreach ($tags as $room_id => $room_tags) {
if (!isset($this->room_tags[$room_id])) {
$this->room_tags[$room_id] = [];
}
$this->room_tags[$room_id] = [
...$this->room_tags[$room_id],
...$room_tags
];
}
}
private function process_sources(): bool {
$source_sdir = SDIRCommunitySource::from_contents($this->contents_sdir);
$source_asgl = ASGLCommunitySource::from_contents($this->contents_asgl);
$source_sdir && $this->add_tags($source_sdir->get_tags());
$source_asgl && $this->add_tags($source_asgl->get_tags());
if (!$source_sdir) {
return false;
}
if (!$source_asgl) {
return false;
}
return true;
}
/**
* Returns all join URLs found.
* @return string[] Join URLs.
*/
public function get_join_urls(): array {
return array_unique(
parse_join_links($this->contents_aggregated)
);
}
/**
* Return known tags for the given room.
* @param string $room_id Room identifier.
* @return \CommunityTag[] Array of string tags.
*/
public function get_room_tags($room_id): array {
if (!isset($this->room_tags[$room_id])) {
return [];
}
return $this->room_tags[$room_id];
}
}
?>