|
| 1 | +<?php |
| 2 | + |
| 3 | +declare(strict_types=1); |
| 4 | +/** |
| 5 | + * @copyright Copyright (c) 2022 Julius Härtl <jus@bitgrid.net> |
| 6 | + * |
| 7 | + * @author Julius Härtl <jus@bitgrid.net> |
| 8 | + * @author Anupam Kumar <kyteinsky@gmail.com> |
| 9 | + * |
| 10 | + * @license GNU AGPL version 3 or any later version |
| 11 | + * |
| 12 | + * This program is free software: you can redistribute it and/or modify |
| 13 | + * it under the terms of the GNU Affero General Public License as |
| 14 | + * published by the Free Software Foundation, either version 3 of the |
| 15 | + * License, or (at your option) any later version. |
| 16 | + * |
| 17 | + * This program is distributed in the hope that it will be useful, |
| 18 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | + * GNU Affero General Public License for more details. |
| 21 | + * |
| 22 | + * You should have received a copy of the GNU Affero General Public License |
| 23 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 24 | + */ |
| 25 | + |
| 26 | +namespace OCP\Collaboration\Reference; |
| 27 | + |
| 28 | +use Fusonic\OpenGraph\Consumer; |
| 29 | +use GuzzleHttp\Exception\GuzzleException; |
| 30 | +use GuzzleHttp\Psr7\LimitStream; |
| 31 | +use GuzzleHttp\Psr7\Utils; |
| 32 | +use OC\Security\RateLimiting\Exception\RateLimitExceededException; |
| 33 | +use OC\Security\RateLimiting\Limiter; |
| 34 | +use OC\SystemConfig; |
| 35 | +use OCP\Files\AppData\IAppDataFactory; |
| 36 | +use OCP\Files\NotFoundException; |
| 37 | +use OCP\Http\Client\IClientService; |
| 38 | +use OCP\IRequest; |
| 39 | +use OCP\IURLGenerator; |
| 40 | +use OCP\IUserSession; |
| 41 | +use Psr\Log\LoggerInterface; |
| 42 | + |
| 43 | +/** |
| 44 | + * @since 29.0.0 |
| 45 | + */ |
| 46 | +class LinkReferenceProvider implements IReferenceProvider { |
| 47 | + |
| 48 | + /* for image size and webpage header */ |
| 49 | + private const MAX_CONTENT_LENGTH = 5 * 1024 * 1024; |
| 50 | + |
| 51 | + private const ALLOWED_CONTENT_TYPES = [ |
| 52 | + 'image/png', |
| 53 | + 'image/jpg', |
| 54 | + 'image/jpeg', |
| 55 | + 'image/gif', |
| 56 | + 'image/svg+xml', |
| 57 | + 'image/webp' |
| 58 | + ]; |
| 59 | + |
| 60 | + /** |
| 61 | + * @since 29.0.0 |
| 62 | + */ |
| 63 | + public function __construct( |
| 64 | + private IClientService $clientService, |
| 65 | + private LoggerInterface $logger, |
| 66 | + private SystemConfig $systemConfig, |
| 67 | + private IAppDataFactory $appDataFactory, |
| 68 | + private IURLGenerator $urlGenerator, |
| 69 | + private Limiter $limiter, |
| 70 | + private IUserSession $userSession, |
| 71 | + private IRequest $request, |
| 72 | + ) { |
| 73 | + } |
| 74 | + |
| 75 | + /** |
| 76 | + * @inheritDoc |
| 77 | + * @since 29.0.0 |
| 78 | + */ |
| 79 | + public function matchReference(string $referenceText): bool { |
| 80 | + if ($this->systemConfig->getValue('reference_opengraph', true) !== true) { |
| 81 | + return false; |
| 82 | + } |
| 83 | + |
| 84 | + return (bool)preg_match(IURLGenerator::URL_REGEX, $referenceText); |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * @inheritDoc |
| 89 | + * @since 29.0.0 |
| 90 | + */ |
| 91 | + public function resolveReference(string $referenceText): ?IReference { |
| 92 | + if ($this->matchReference($referenceText)) { |
| 93 | + $reference = new Reference($referenceText); |
| 94 | + $this->fetchReference($reference); |
| 95 | + return $reference; |
| 96 | + } |
| 97 | + |
| 98 | + return null; |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * Populates the reference with OpenGraph data |
| 103 | + * |
| 104 | + * @param Reference $reference |
| 105 | + * @since 29.0.0 |
| 106 | + */ |
| 107 | + private function fetchReference(Reference $reference): void { |
| 108 | + try { |
| 109 | + $user = $this->userSession->getUser(); |
| 110 | + if ($user) { |
| 111 | + $this->limiter->registerUserRequest('opengraph', 10, 120, $user); |
| 112 | + } else { |
| 113 | + $this->limiter->registerAnonRequest('opengraph', 10, 120, $this->request->getRemoteAddress()); |
| 114 | + } |
| 115 | + } catch (RateLimitExceededException $e) { |
| 116 | + return; |
| 117 | + } |
| 118 | + |
| 119 | + $client = $this->clientService->newClient(); |
| 120 | + try { |
| 121 | + $headResponse = $client->head($reference->getId(), [ 'timeout' => 10 ]); |
| 122 | + } catch (\Exception $e) { |
| 123 | + $this->logger->debug('Failed to perform HEAD request to get target metadata', ['exception' => $e]); |
| 124 | + return; |
| 125 | + } |
| 126 | + |
| 127 | + $linkContentLength = $headResponse->getHeader('Content-Length'); |
| 128 | + if (is_numeric($linkContentLength) && (int) $linkContentLength > self::MAX_CONTENT_LENGTH) { |
| 129 | + $this->logger->debug('Skip resolving links pointing to content length > 5 MiB'); |
| 130 | + return; |
| 131 | + } |
| 132 | + |
| 133 | + $linkContentType = $headResponse->getHeader('Content-Type'); |
| 134 | + $expectedContentTypeRegex = '/^text\/html;?/i'; |
| 135 | + |
| 136 | + // check the header begins with the expected content type |
| 137 | + if (!preg_match($expectedContentTypeRegex, $linkContentType)) { |
| 138 | + $this->logger->debug('Skip resolving links pointing to content type that is not "text/html"'); |
| 139 | + return; |
| 140 | + } |
| 141 | + |
| 142 | + try { |
| 143 | + $response = $client->get($reference->getId(), [ 'timeout' => 10 ]); |
| 144 | + } catch (\Exception $e) { |
| 145 | + $this->logger->debug('Failed to fetch link for obtaining open graph data', ['exception' => $e]); |
| 146 | + return; |
| 147 | + } |
| 148 | + |
| 149 | + $responseBody = (string)$response->getBody(); |
| 150 | + |
| 151 | + // OpenGraph handling |
| 152 | + $consumer = new Consumer(); |
| 153 | + $consumer->useFallbackMode = true; |
| 154 | + $object = $consumer->loadHtml($responseBody); |
| 155 | + |
| 156 | + $reference->setUrl($reference->getId()); |
| 157 | + |
| 158 | + if ($object->title) { |
| 159 | + $reference->setTitle($object->title); |
| 160 | + } |
| 161 | + |
| 162 | + if ($object->description) { |
| 163 | + $reference->setDescription($object->description); |
| 164 | + } |
| 165 | + |
| 166 | + if ($object->images) { |
| 167 | + try { |
| 168 | + $host = parse_url($object->images[0]->url, PHP_URL_HOST); |
| 169 | + if ($host === false || $host === null) { |
| 170 | + $this->logger->warning('Could not detect host of open graph image URI for ' . $reference->getId()); |
| 171 | + return; |
| 172 | + } |
| 173 | + |
| 174 | + $appData = $this->appDataFactory->get('core'); |
| 175 | + try { |
| 176 | + $folder = $appData->getFolder('opengraph'); |
| 177 | + } catch (NotFoundException $e) { |
| 178 | + $folder = $appData->newFolder('opengraph'); |
| 179 | + } |
| 180 | + |
| 181 | + $response = $client->get($object->images[0]->url, ['timeout' => 10]); |
| 182 | + $contentType = $response->getHeader('Content-Type'); |
| 183 | + $contentLength = $response->getHeader('Content-Length'); |
| 184 | + |
| 185 | + if (in_array($contentType, self::ALLOWED_CONTENT_TYPES, true) && $contentLength < self::MAX_CONTENT_LENGTH) { |
| 186 | + $stream = Utils::streamFor($response->getBody()); |
| 187 | + $bodyStream = new LimitStream($stream, self::MAX_CONTENT_LENGTH, 0); |
| 188 | + $reference->setImageContentType($contentType); |
| 189 | + $folder->newFile(md5($reference->getId()), $bodyStream->getContents()); |
| 190 | + $reference->setImageUrl($this->urlGenerator->linkToRouteAbsolute('core.Reference.preview', ['referenceId' => md5($reference->getId())])); |
| 191 | + } |
| 192 | + } catch (GuzzleException $e) { |
| 193 | + $this->logger->info('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]); |
| 194 | + } catch (\Throwable $e) { |
| 195 | + $this->logger->error('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]); |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | + |
| 200 | + /** |
| 201 | + * @inheritDoc |
| 202 | + * @since 29.0.0 |
| 203 | + */ |
| 204 | + public function getCachePrefix(string $referenceId): string { |
| 205 | + return $referenceId; |
| 206 | + } |
| 207 | + |
| 208 | + /** |
| 209 | + * @inheritDoc |
| 210 | + * @since 29.0.0 |
| 211 | + */ |
| 212 | + public function getCacheKey(string $referenceId): ?string { |
| 213 | + return null; |
| 214 | + } |
| 215 | +} |
0 commit comments