crawlUrl($url); $parser = new HtmlParser($html); /** * Certaines données sont représentées sous 2 titres car les différents modèles utilisent des noms différents pour * des choses similaires * @todo fix it */ $data = [ 'title' => substr($parser->getTitle(), 0, 250), 'name' => substr($parser->getTitle(), 0, 250), 'excerpt' => $parser->getMeta('description'), 'description' => $parser->getMeta('description'), 'host' => $urlInfo['host'] ]; $richSchema = $parser->getRichSchema(); $listOfTypes = [ "NewsArticle", "Report", "ScholarlyArticle", "SocialMediaPosting", "TechArticle", "Article", "BlogPosting", ]; if ($richSchema !== false) { foreach ($richSchema as $schema) { if ( ! isset($schema['@type']) || !in_array($schema["@type"], $listOfTypes) ) { continue; } if (isset($schema['datePublished'])) { $date = new \DateTimeImmutable($schema['datePublished']); $data['date'] = $date->format('Y-m-d'); } if (isset($schema['articleBody'])) { $body = strip_tags($schema['articleBody']); $data['readingTime'] = floor(str_word_count($body) / self::WORD_READ_BY_MINUTES); } } } $data = array_map(function ($value) { return trim($value); }, $data); return $data; } }