$data
*/
private function parse_hcard(array $data, bool $category = false): string
{
$name = '';
$link = '';
// Check if h-card is set and pass that information on in the link.
if (isset($data['type']) && in_array('h-card', $data['type'])) {
if (isset($data['properties']['name'][0])) {
$name = $data['properties']['name'][0];
}
if (isset($data['properties']['url'][0])) {
$link = $data['properties']['url'][0];
if ($name === '') {
$name = $link;
} else {
// can't have commas in categories.
$name = str_replace(',', '', $name);
}
$person_tag = $category ? '' : '';
return ''.$person_tag.$name.'';
}
}
return $data['value'] ?? '';
}
/**
* @return true
*/
private function parse_microformats(string &$data, string $url): bool
{
// For PHPStan, we already check that in call site.
\assert(function_exists('Mf2\parse'));
\assert(function_exists('Mf2\fetch'));
$feed_title = '';
$feed_author = null;
$author_cache = [];
$items = [];
$entries = [];
$mf = \Mf2\parse($data, $url);
// First look for an h-feed.
$h_feed = [];
foreach ($mf['items'] as $mf_item) {
if (in_array('h-feed', $mf_item['type'])) {
$h_feed = $mf_item;
break;
}
// Also look for h-feed or h-entry in the children of each top level item.
if (!isset($mf_item['children'][0]['type'])) {
continue;
}
if (in_array('h-feed', $mf_item['children'][0]['type'])) {
$h_feed = $mf_item['children'][0];
// In this case the parent of the h-feed may be an h-card, so use it as
// the feed_author.
if (in_array('h-card', $mf_item['type'])) {
$feed_author = $mf_item;
}
break;
} elseif (in_array('h-entry', $mf_item['children'][0]['type'])) {
$entries = $mf_item['children'];
// In this case the parent of the h-entry list may be an h-card, so use
// it as the feed_author.
if (in_array('h-card', $mf_item['type'])) {
$feed_author = $mf_item;
}
break;
}
}
if (isset($h_feed['children'])) {
$entries = $h_feed['children'];
// Also set the feed title and store author from the h-feed if available.
if (isset($mf['items'][0]['properties']['name'][0])) {
$feed_title = $mf['items'][0]['properties']['name'][0];
}
if (isset($mf['items'][0]['properties']['author'][0])) {
$feed_author = $mf['items'][0]['properties']['author'][0];
}
} elseif (count($entries) === 0) {
$entries = $mf['items'];
}
for ($i = 0; $i < count($entries); $i++) {
$entry = $entries[$i];
if (in_array('h-entry', $entry['type'])) {
$item = [];
$title = '';
$description = '';
if (isset($entry['properties']['url'][0])) {
$link = $entry['properties']['url'][0];
if (isset($link['value'])) {
$link = $link['value'];
}
$item['link'] = [['data' => $link]];
}
if (isset($entry['properties']['uid'][0])) {
$guid = $entry['properties']['uid'][0];
if (isset($guid['value'])) {
$guid = $guid['value'];
}
$item['guid'] = [['data' => $guid]];
}
if (isset($entry['properties']['name'][0])) {
$title = $entry['properties']['name'][0];
if (isset($title['value'])) {
$title = $title['value'];
}
$item['title'] = [['data' => $title]];
}
if (isset($entry['properties']['author'][0]) || isset($feed_author)) {
// author is a special case, it can be plain text or an h-card array.
// If it's plain text it can also be a url that should be followed to
// get the actual h-card.
$author = $entry['properties']['author'][0] ?? $feed_author;
if (!is_string($author)) {
$author = $this->parse_hcard($author);
} elseif (strpos($author, 'http') === 0) {
if (isset($author_cache[$author])) {
$author = $author_cache[$author];
} else {
if ($mf = \Mf2\fetch($author)) {
foreach ($mf['items'] as $hcard) {
// Only interested in an h-card by itself in this case.
if (!in_array('h-card', $hcard['type'])) {
continue;
}
// It must have a url property matching what we fetched.
if (!isset($hcard['properties']['url']) ||
!(in_array($author, $hcard['properties']['url']))) {
continue;
}
// Save parse_hcard the trouble of finding the correct url.
$hcard['properties']['url'][0] = $author;
// Cache this h-card for the next h-entry to check.
$author_cache[$author] = $this->parse_hcard($hcard);
$author = $author_cache[$author];
break;
}
}
}
}
$item['author'] = [['data' => $author]];
}
if (isset($entry['properties']['photo'][0])) {
// If a photo is also in content, don't need to add it again here.
$content = '';
if (isset($entry['properties']['content'][0]['html'])) {
$content = $entry['properties']['content'][0]['html'];
}
$photo_list = [];
for ($j = 0; $j < count($entry['properties']['photo']); $j++) {
$photo = $entry['properties']['photo'][$j];
if (!empty($photo) && strpos($content, $photo) === false) {
$photo_list[] = $photo;
}
}
// When there's more than one photo show the first and use a lightbox.
// Need a permanent, unique name for the image set, but don't have
// anything unique except for the content itself, so use that.
$count = count($photo_list);
if ($count > 1) {
$image_set_id = preg_replace('/[[:^alnum:]]/', '', $photo_list[0]);
$description = '';
for ($j = 0; $j < $count; $j++) {
$hidden = $j === 0 ? '' : 'class="hidden" ';
$description .= ''.
'
';
}
$description .= '
'.$count.' photos
';
} elseif ($count == 1) {
$description = '
';
}
}
if (isset($entry['properties']['content'][0]['html'])) {
// e-content['value'] is the same as p-name when they are on the same
// element. Use this to replace title with a strip_tags version so
// that alt text from images is not included in the title.
if ($entry['properties']['content'][0]['value'] === $title) {
$title = strip_tags($entry['properties']['content'][0]['html']);
$item['title'] = [['data' => $title]];
}
$description .= $entry['properties']['content'][0]['html'];
if (isset($entry['properties']['in-reply-to'][0])) {
$in_reply_to = '';
if (is_string($entry['properties']['in-reply-to'][0])) {
$in_reply_to = $entry['properties']['in-reply-to'][0];
} elseif (isset($entry['properties']['in-reply-to'][0]['value'])) {
$in_reply_to = $entry['properties']['in-reply-to'][0]['value'];
}
if ($in_reply_to !== '') {
$description .= ' '.
''.$in_reply_to.'
';
}
}
$item['description'] = [['data' => $description]];
}
if (isset($entry['properties']['category'])) {
$category_csv = '';
// Categories can also contain h-cards.
foreach ($entry['properties']['category'] as $category) {
if ($category_csv !== '') {
$category_csv .= ', ';
}
if (is_string($category)) {
// Can't have commas in categories.
$category_csv .= str_replace(',', '', $category);
} else {
$category_csv .= $this->parse_hcard($category, true);
}
}
$item['category'] = [['data' => $category_csv]];
}
if (isset($entry['properties']['published'][0])) {
$timestamp = strtotime($entry['properties']['published'][0]);
$pub_date = date('F j Y g:ia', $timestamp).' GMT';
$item['pubDate'] = [['data' => $pub_date]];
}
// The title and description are set to the empty string to represent
// a deleted item (which also makes it an invalid rss item).
if (isset($entry['properties']['deleted'][0])) {
$item['title'] = [['data' => '']];
$item['description'] = [['data' => '']];
}
$items[] = ['child' => ['' => $item]];
}
}
// Mimic RSS data format when storing microformats.
$link = [['data' => $url]];
$image = '';
if (!is_string($feed_author) &&
isset($feed_author['properties']['photo'][0])) {
$image = [['child' => ['' => ['url' =>
[['data' => $feed_author['properties']['photo'][0]]]]]]];
}
// Use the name given for the h-feed, or get the title from the html.
if ($feed_title !== '') {
$feed_title = [['data' => htmlspecialchars($feed_title)]];
} elseif ($position = strpos($data, '
')) {
$start = $position < 200 ? 0 : $position - 200;
$check = substr($data, $start, 400);
$matches = [];
if (preg_match('/(.+)<\/title>/', $check, $matches)) {
$feed_title = [['data' => htmlspecialchars($matches[1])]];
}
}
$channel = ['channel' => [['child' => ['' =>
['link' => $link, 'image' => $image, 'title' => $feed_title,
'item' => $items]]]]];
$rss = [['attribs' => ['' => ['version' => '2.0']],
'child' => ['' => $channel]]];
$this->data = ['child' => ['' => ['rss' => $rss]]];
return true;
}
private static function set_doctype(string $data): string
{
// Strip DOCTYPE except if containing an [internal subset]
$data = preg_replace('/^\\s*\\[\\]]*>\s*/', '', $data) ?? $data;
// Declare HTML entities only if no remaining DOCTYPE
$doctype = preg_match('/^\\s* ]>';
}
}
class_alias('SimplePie\Parser', 'SimplePie_Parser');