From ae8feea18ec10e8baf1d232cb33f724ac2932f65 Mon Sep 17 00:00:00 2001 From: LordArrin <79581469+LordArrin@users.noreply.github.com> Date: Fri, 26 Jun 2026 02:49:03 +0300 Subject: [PATCH 1/5] Create PanoramaBridge.php --- bridges/PanoramaBridge.php | 213 +++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 bridges/PanoramaBridge.php diff --git a/bridges/PanoramaBridge.php b/bridges/PanoramaBridge.php new file mode 100644 index 00000000000..6533077341f --- /dev/null +++ b/bridges/PanoramaBridge.php @@ -0,0 +1,213 @@ +getDatesToFetch(); + $processedUris = []; + + foreach ($dates as $date) { + $url = self::URI . '/news/' . $date; + + $htmlContent = $this->fetchPageContent($url); + if ($htmlContent === null) { + continue; + } + + $html = str_get_html($htmlContent); + $html = defaultLinkTo($html, self::URI); + $cards = $html->find('a.flex-col'); + + foreach ($cards as $card) { + $uri = $card->href; + $path = parse_url($uri, PHP_URL_PATH); + + if (!$this->isValidNewsUri($path, $processedUris)) { + continue; + } + + $processedUris[] = $uri; + $item = $this->processNewsCard($card, $uri); + + if ($item !== null) { + $this->items[] = $item; + } + + usleep(800000); // 800ms delay between requests + } + } + } + + private function fetchPageContent(string $url): ?string { + try { + $headers = [ + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36' + ]; + return getContents($url, $headers); + } catch (Exception $e) { + return null; + } + } + + private function getDatesToFetch(): array { + $tzString = date_default_timezone_get(); + $timezone = new DateTimeZone($tzString); + + $today = new DateTime('now', $timezone); + $yesterday = new DateTime('now', $timezone); + $yesterday->modify('-1 day'); + + return [ + $today->format('d-m-Y'), + $yesterday->format('d-m-Y') + ]; + } + + private function isValidNewsUri(?string $path, array $processedUris): bool { + if ($path === null) { + return false; + } + + if (preg_match('/^\/news\/\d{2}-\d{2}-\d{4}$/', $path)) { + return false; + } + + if ($path === '/news' || $path === '/news/') { + return false; + } + + if (in_array($path, $processedUris)) { + return false; + } + + return true; + } + + private function processNewsCard($card, string $uri): ?array { + $previewTitle = $this->extractPreviewTitle($card); + $previewImage = $this->extractPreviewImage($card); + + $articleContent = $this->fetchPageContent($uri); + if ($articleContent === null) { + return null; + } + + $articleHTML = str_get_html($articleContent); + $articleHTML = defaultLinkTo($articleHTML, self::URI); + + $item = [ + 'uri' => $uri, + 'uid' => $uri, + 'title' => $this->extractTitle($articleHTML, $previewTitle), + 'timestamp' => $this->extractTimestamp($articleHTML), + 'author' => $this->extractAuthor($articleHTML), + 'content' => $this->buildFinalContent( + $this->extractImage($articleHTML, $previewImage), + $this->extractContent($articleHTML, $previewTitle), + $this->extractTitle($articleHTML, $previewTitle) + ) + ]; + + return $item; + } + + private function extractPreviewTitle($card): string { + $titleDiv = $card->find('div.font-semibold', 0); + return $titleDiv ? trim($titleDiv->plaintext) : ''; + } + + private function extractPreviewImage($card): string { + $imgTag = $card->find('img', 0); + if (!$imgTag) { + return ''; + } + + $src = $imgTag->src; + if (strpos($src, '//') === 0) { + return 'https:' . $src; + } + + return $src; + } + + private function extractTitle($articleHTML, string $fallbackTitle): string { + $h1 = $articleHTML->find('h1[itemprop=headline]', 0); + if ($h1) { + return trim($h1->plaintext); + } + + $ogTitle = $articleHTML->find('meta[property="og:title"]', 0); + return $ogTitle ? trim($ogTitle->content) : $fallbackTitle; + } + + private function extractTimestamp($articleHTML): int { + $publishedTime = $articleHTML->find('meta[property="article:published_time"]', 0); + return $publishedTime ? strtotime($publishedTime->content) : time(); + } + + private function extractAuthor($articleHTML): string { + $authorTag = $articleHTML->find('meta[property="article:author"]', 0); + return $authorTag ? $authorTag->content : 'IA Panorama'; + } + + private function extractImage($articleHTML, string $fallbackImage): string { + $ogImage = $articleHTML->find('meta[property="og:image"]', 0); + $imageUrl = $ogImage ? trim($ogImage->content) : $fallbackImage; + + if (strpos($imageUrl, '//') === 0) { + return 'https:' . $imageUrl; + } + + return $imageUrl; + } + + private function extractContent($articleHTML, string $fallbackDescription): string { + $contentElem = $articleHTML->find('div[itemprop=articleBody]', 0); + if (!$contentElem) { + $contentElem = $articleHTML->find('.entry-contents', 0); + } + + if ($contentElem) { + $this->cleanContent($contentElem); + return $contentElem->innertext; + } + + $ogDesc = $articleHTML->find('meta[property="og:description"]', 0); + $description = $ogDesc ? trim($ogDesc->content) : $fallbackDescription; + return '

' . htmlspecialchars($description) . '

'; + } + + private function cleanContent($contentElem): void { + $junkSelectors = [ + 'script', + 'style', + 'div[id*=yandex_rtb]', + '.sharethis-inline-share-buttons', + '.alert' + ]; + + foreach($contentElem->find(implode(',', $junkSelectors)) as $junk) { + $junk->outertext = ''; + } + } + + private function buildFinalContent(string $imageUrl, string $content, string $title): string { + $finalContent = ''; + + if (!empty($imageUrl)) { + $finalContent .= '
' . htmlspecialchars($title) . '

'; + } + + $finalContent .= $content; + + return $finalContent; + } +} \ No newline at end of file From aac5a7131e4df3fa805e2f9c67934eda339708b3 Mon Sep 17 00:00:00 2001 From: LordArrin <79581469+LordArrin@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:10:29 +0300 Subject: [PATCH 2/5] Update PanoramaBridge.php --- bridges/PanoramaBridge.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bridges/PanoramaBridge.php b/bridges/PanoramaBridge.php index 6533077341f..b1c3566081f 100644 --- a/bridges/PanoramaBridge.php +++ b/bridges/PanoramaBridge.php @@ -9,6 +9,8 @@ class PanoramaBridge extends BridgeAbstract { const DESCRIPTION = 'News feed of the Russian satirical information agency "Panorama"'; const CACHE_TIMEOUT = 3600; + const PARAMETERS = []; + public function collectData(): void { $dates = $this->getDatesToFetch(); $processedUris = []; @@ -95,12 +97,16 @@ private function processNewsCard($card, string $uri): ?array { $previewTitle = $this->extractPreviewTitle($card); $previewImage = $this->extractPreviewImage($card); - $articleContent = $this->fetchPageContent($uri); - if ($articleContent === null) { + try { + $articleHTML = getSimpleHTMLDOMCached($uri, 86400); + } catch (Exception $e) { + return null; + } + + if (!$articleHTML) { return null; } - $articleHTML = str_get_html($articleContent); $articleHTML = defaultLinkTo($articleHTML, self::URI); $item = [ From 79735b79cdda15756100ecc81b6b860cc08f96f3 Mon Sep 17 00:00:00 2001 From: LordArrin <79581469+LordArrin@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:24:10 +0300 Subject: [PATCH 3/5] Update PanoramaBridge.php --- bridges/PanoramaBridge.php | 58 ++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/bridges/PanoramaBridge.php b/bridges/PanoramaBridge.php index b1c3566081f..f25bddafd5d 100644 --- a/bridges/PanoramaBridge.php +++ b/bridges/PanoramaBridge.php @@ -18,12 +18,12 @@ public function collectData(): void { foreach ($dates as $date) { $url = self::URI . '/news/' . $date; - $htmlContent = $this->fetchPageContent($url); - if ($htmlContent === null) { + try { + $html = getSimpleHTMLDOM($url); + } catch (Exception $e) { continue; } - $html = str_get_html($htmlContent); $html = defaultLinkTo($html, self::URI); $cards = $html->find('a.flex-col'); @@ -36,26 +36,24 @@ public function collectData(): void { } $processedUris[] = $uri; + + $cachedItem = $this->loadCacheValue($uri); + if ($cachedItem !== null) { + $this->items[] = $cachedItem; + continue; + } + $item = $this->processNewsCard($card, $uri); if ($item !== null) { $this->items[] = $item; + $this->saveCacheValue($uri, $item, 604800); // 7 days } usleep(800000); // 800ms delay between requests } - } - } - - private function fetchPageContent(string $url): ?string { - try { - $headers = [ - 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36' - ]; - return getContents($url, $headers); - } catch (Exception $e) { - return null; + + usleep(800000); // delay between date pages } } @@ -109,7 +107,7 @@ private function processNewsCard($card, string $uri): ?array { $articleHTML = defaultLinkTo($articleHTML, self::URI); - $item = [ + return [ 'uri' => $uri, 'uid' => $uri, 'title' => $this->extractTitle($articleHTML, $previewTitle), @@ -121,8 +119,6 @@ private function processNewsCard($card, string $uri): ?array { $this->extractTitle($articleHTML, $previewTitle) ) ]; - - return $item; } private function extractPreviewTitle($card): string { @@ -182,7 +178,17 @@ private function extractContent($articleHTML, string $fallbackDescription): stri } if ($contentElem) { - $this->cleanContent($contentElem); + $junkSelectors = [ + 'script', + 'style', + 'div[id*=yandex_rtb]', + '.sharethis-inline-share-buttons', + '.alert' + ]; + + foreach($contentElem->find(implode(',', $junkSelectors)) as $junk) { + $junk->outertext = ''; + } return $contentElem->innertext; } @@ -191,20 +197,6 @@ private function extractContent($articleHTML, string $fallbackDescription): stri return '

' . htmlspecialchars($description) . '

'; } - private function cleanContent($contentElem): void { - $junkSelectors = [ - 'script', - 'style', - 'div[id*=yandex_rtb]', - '.sharethis-inline-share-buttons', - '.alert' - ]; - - foreach($contentElem->find(implode(',', $junkSelectors)) as $junk) { - $junk->outertext = ''; - } - } - private function buildFinalContent(string $imageUrl, string $content, string $title): string { $finalContent = ''; From 0e6124da5bd31dacb847887bf1dea96eede47686 Mon Sep 17 00:00:00 2001 From: LordArrin <79581469+LordArrin@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:33:30 +0300 Subject: [PATCH 4/5] fix for angery checker --- bridges/PanoramaBridge.php | 107 ++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 48 deletions(-) diff --git a/bridges/PanoramaBridge.php b/bridges/PanoramaBridge.php index f25bddafd5d..30cc9be81c7 100644 --- a/bridges/PanoramaBridge.php +++ b/bridges/PanoramaBridge.php @@ -8,105 +8,108 @@ class PanoramaBridge extends BridgeAbstract { const URI = 'https://panorama.pub'; const DESCRIPTION = 'News feed of the Russian satirical information agency "Panorama"'; const CACHE_TIMEOUT = 3600; - const PARAMETERS = []; - public function collectData(): void { + public function collectData(): void + { $dates = $this->getDatesToFetch(); $processedUris = []; foreach ($dates as $date) { $url = self::URI . '/news/' . $date; - + try { $html = getSimpleHTMLDOM($url); } catch (Exception $e) { continue; } - + $html = defaultLinkTo($html, self::URI); $cards = $html->find('a.flex-col'); foreach ($cards as $card) { $uri = $card->href; $path = parse_url($uri, PHP_URL_PATH); - + if (!$this->isValidNewsUri($path, $processedUris)) { continue; } - + $processedUris[] = $uri; - + $cachedItem = $this->loadCacheValue($uri); if ($cachedItem !== null) { $this->items[] = $cachedItem; continue; } - + $item = $this->processNewsCard($card, $uri); - + if ($item !== null) { $this->items[] = $item; - $this->saveCacheValue($uri, $item, 604800); // 7 days + $this->saveCacheValue($uri, $item, 604800); } - - usleep(800000); // 800ms delay between requests + + usleep(800000); } - - usleep(800000); // delay between date pages + + usleep(800000); } } - private function getDatesToFetch(): array { + private function getDatesToFetch(): array + { $tzString = date_default_timezone_get(); $timezone = new DateTimeZone($tzString); - + $today = new DateTime('now', $timezone); $yesterday = new DateTime('now', $timezone); $yesterday->modify('-1 day'); - + return [ $today->format('d-m-Y'), $yesterday->format('d-m-Y') ]; } - private function isValidNewsUri(?string $path, array $processedUris): bool { + private function isValidNewsUri(?string $path, array $processedUris): bool + { if ($path === null) { return false; } - + if (preg_match('/^\/news\/\d{2}-\d{2}-\d{4}$/', $path)) { return false; } - + if ($path === '/news' || $path === '/news/') { return false; } - + if (in_array($path, $processedUris)) { return false; } - + return true; } - private function processNewsCard($card, string $uri): ?array { + private function processNewsCard($card, string $uri): ?array + { $previewTitle = $this->extractPreviewTitle($card); $previewImage = $this->extractPreviewImage($card); - + try { $articleHTML = getSimpleHTMLDOMCached($uri, 86400); } catch (Exception $e) { return null; } - + if (!$articleHTML) { return null; } - + $articleHTML = defaultLinkTo($articleHTML, self::URI); - + return [ 'uri' => $uri, 'uid' => $uri, @@ -121,57 +124,64 @@ private function processNewsCard($card, string $uri): ?array { ]; } - private function extractPreviewTitle($card): string { + private function extractPreviewTitle($card): string + { $titleDiv = $card->find('div.font-semibold', 0); return $titleDiv ? trim($titleDiv->plaintext) : ''; } - private function extractPreviewImage($card): string { + private function extractPreviewImage($card): string + { $imgTag = $card->find('img', 0); if (!$imgTag) { return ''; } - + $src = $imgTag->src; if (strpos($src, '//') === 0) { return 'https:' . $src; } - + return $src; } - private function extractTitle($articleHTML, string $fallbackTitle): string { + private function extractTitle($articleHTML, string $fallbackTitle): string + { $h1 = $articleHTML->find('h1[itemprop=headline]', 0); if ($h1) { return trim($h1->plaintext); } - + $ogTitle = $articleHTML->find('meta[property="og:title"]', 0); return $ogTitle ? trim($ogTitle->content) : $fallbackTitle; } - private function extractTimestamp($articleHTML): int { + private function extractTimestamp($articleHTML): int + { $publishedTime = $articleHTML->find('meta[property="article:published_time"]', 0); return $publishedTime ? strtotime($publishedTime->content) : time(); } - private function extractAuthor($articleHTML): string { + private function extractAuthor($articleHTML): string + { $authorTag = $articleHTML->find('meta[property="article:author"]', 0); return $authorTag ? $authorTag->content : 'IA Panorama'; } - private function extractImage($articleHTML, string $fallbackImage): string { + private function extractImage($articleHTML, string $fallbackImage): string + { $ogImage = $articleHTML->find('meta[property="og:image"]', 0); $imageUrl = $ogImage ? trim($ogImage->content) : $fallbackImage; - + if (strpos($imageUrl, '//') === 0) { return 'https:' . $imageUrl; } - + return $imageUrl; } - private function extractContent($articleHTML, string $fallbackDescription): string { + private function extractContent($articleHTML, string $fallbackDescription): string + { $contentElem = $articleHTML->find('div[itemprop=articleBody]', 0); if (!$contentElem) { $contentElem = $articleHTML->find('.entry-contents', 0); @@ -179,33 +189,34 @@ private function extractContent($articleHTML, string $fallbackDescription): stri if ($contentElem) { $junkSelectors = [ - 'script', - 'style', + 'script', + 'style', 'div[id*=yandex_rtb]', '.sharethis-inline-share-buttons', '.alert' ]; - - foreach($contentElem->find(implode(',', $junkSelectors)) as $junk) { + + foreach ($contentElem->find(implode(',', $junkSelectors)) as $junk) { $junk->outertext = ''; } return $contentElem->innertext; } - + $ogDesc = $articleHTML->find('meta[property="og:description"]', 0); $description = $ogDesc ? trim($ogDesc->content) : $fallbackDescription; return '

' . htmlspecialchars($description) . '

'; } - private function buildFinalContent(string $imageUrl, string $content, string $title): string { + private function buildFinalContent(string $imageUrl, string $content, string $title): string + { $finalContent = ''; - + if (!empty($imageUrl)) { $finalContent .= '
' . htmlspecialchars($title) . '

'; } - + $finalContent .= $content; - + return $finalContent; } } \ No newline at end of file From 1beea18cf0390879560265108dcef10af411bc5d Mon Sep 17 00:00:00 2001 From: LordArrin <79581469+LordArrin@users.noreply.github.com> Date: Fri, 26 Jun 2026 03:36:30 +0300 Subject: [PATCH 5/5] fix for fix for angry checker --- bridges/PanoramaBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/PanoramaBridge.php b/bridges/PanoramaBridge.php index 30cc9be81c7..f75d43f79fd 100644 --- a/bridges/PanoramaBridge.php +++ b/bridges/PanoramaBridge.php @@ -2,7 +2,8 @@ declare(strict_types=1); -class PanoramaBridge extends BridgeAbstract { +class PanoramaBridge extends BridgeAbstract +{ const MAINTAINER = 'LordArrin'; const NAME = 'IA Panorama'; const URI = 'https://panorama.pub';