Fixed broken sitemap parsing

This commit is contained in:
Lucien Cartier-Tilet 2020-05-26 02:13:48 +02:00
parent 2334518ecc
commit 4ed2d6bdc4
Signed by: phundrak
GPG Key ID: BD7789E705CB8DCA
1 changed files with 7 additions and 4 deletions

View File

@ -7,7 +7,7 @@ final excluded_keywords = {'index', 'CONTRIBUTING', 'LICENSE', 'README'};
// Get the sitemap content
Future<String> fetchRemoteSitemap() async {
const path = 'sitemap.html';
const path = '/sitemap.html';
try {
return await html.HttpRequest.getString(path);
} catch (e) {
@ -19,14 +19,17 @@ Future<String> fetchRemoteSitemap() async {
// Parse the list of elements and detect pages from this list
Map<String, String> detectPages(List<dom.Element> t_sitemap,
[String t_prefix]) {
final sitemap = t_sitemap.where((e) => !excluded_keywords.contains(e));
final links = <String, String>{};
for (var elem in sitemap) {
for (var elem in t_sitemap) {
if (elem.innerHtml.startsWith('<a')) {
elem = elem.firstChild;
final url = elem.attributes['href'];
final text = elem.firstChild.text;
links[url] = (t_prefix == null) ? text : '$text ($t_prefix)';
if (excluded_keywords.contains(text) ||
excluded_keywords.contains(url.substring(0, url.length - 5))) {
continue;
}
links['/$url'] = (t_prefix == null) ? text : '$text ($t_prefix)';
} else {
t_prefix = (t_prefix == null)
? elem.firstChild.text.replaceAll('\n', '')