Fixed broken sitemap parsing

2020-05-26 02:13:48 +02:00 · 2020-05-26 02:13:48 +02:00 · 4ed2d6bdc4
commit 4ed2d6bdc4
parent 2334518ecc
1 changed files with 7 additions and 4 deletions
--- a/web/dart/parse_sitemap.dart
+++ b/web/dart/parse_sitemap.dart
@ -7,7 +7,7 @@ final excluded_keywords = {'index', 'CONTRIBUTING', 'LICENSE', 'README'};

 // Get the sitemap content
 Future<String> fetchRemoteSitemap() async {
-  const path = 'sitemap.html';
+  const path = '/sitemap.html';
  try {
    return await html.HttpRequest.getString(path);
  } catch (e) {
@ -19,14 +19,17 @@ Future<String> fetchRemoteSitemap() async {
 // Parse the list of elements and detect pages from this list
 Map<String, String> detectPages(List<dom.Element> t_sitemap,
    [String t_prefix]) {
-  final sitemap = t_sitemap.where((e) => !excluded_keywords.contains(e));
  final links = <String, String>{};
-  for (var elem in sitemap) {
+  for (var elem in t_sitemap) {
    if (elem.innerHtml.startsWith('<a')) {
      elem = elem.firstChild;
      final url = elem.attributes['href'];
      final text = elem.firstChild.text;
-      links[url] = (t_prefix == null) ? text : '$text ($t_prefix)';
+      if (excluded_keywords.contains(text) ||
+          excluded_keywords.contains(url.substring(0, url.length - 5))) {
+        continue;
+      }
+      links['/$url'] = (t_prefix == null) ? text : '$text ($t_prefix)';
    } else {
      t_prefix = (t_prefix == null)
          ? elem.firstChild.text.replaceAll('\n', '')