This repository has been archived on 2023-07-22. You can view files and clone it, but cannot push or open issues or pull requests.
nord-for-org/web/dart/parse_sitemap.dart

49 lines
1.4 KiB
Dart
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import 'dart:html' show HttpRequest;
import 'package:html/parser.dart' show parse;
import 'package:html/dom.dart' show Element;
final excluded_keywords = ['index', 'CONTRIBUTING', 'LICENSE', 'README'];
// Get the sitemap content
Future<String> getSitemap() async {
const path = 'sitemap.html';
try {
return await HttpRequest.getString(path);
} catch (e) {
print('Couldnt open $path');
}
return 'Error';
}
// Parse the list of elements and detect pages from this list
Map<String, String> detectPages(List<Element> sitemap, [String prefix]) {
final links = <String, String>{};
for (var elem in sitemap) {
for(var kw in excluded_keywords) {
if (elem.outerHtml.contains(kw)) {
continue;
}
}
if (elem.innerHtml.startsWith('<a')) {
elem = elem.firstChild;
final url = elem.attributes['href'];
final text = elem.firstChild.text;
links[url] = (prefix == null) ? text : '$text ($prefix)';
} else {
final prefix = elem.firstChild.text;
final ul = elem.children[0].children;
links.addAll(detectPages(ul, prefix));
}
}
return links;
}
// This function returns a Map which contains all links to languages detected
// from the sitemap.
Future<Map<String, String>> parseSitemap() async {
final content = await getSitemap();
final sitemap = parse(content).getElementsByClassName('org-ul')[0].children;
return detectPages(sitemap);
}