This repository has been archived on 2023-07-22. You can view files and clone it, but cannot push or open issues or pull requests.
nord-for-org/web/dart/parse_sitemap.dart

49 lines
1.4 KiB
Dart
Raw Normal View History

import 'dart:html' show HttpRequest;
import 'package:html/parser.dart' show parse;
import 'package:html/dom.dart' show Element;
final excluded_keywords = ['index', 'CONTRIBUTING', 'LICENSE', 'README'];
// Get the sitemap content
Future<String> getSitemap() async {
const path = 'sitemap.html';
try {
return await HttpRequest.getString(path);
} catch (e) {
print('Couldnt open $path');
}
return 'Error';
}
// Parse the list of elements and detect pages from this list
Map<String, String> detectPages(List<Element> sitemap, [String prefix]) {
final links = <String, String>{};
for (var elem in sitemap) {
for(var kw in excluded_keywords) {
if (elem.outerHtml.contains(kw)) {
continue;
}
}
if (elem.innerHtml.startsWith('<a')) {
elem = elem.firstChild;
final url = elem.attributes['href'];
final text = elem.firstChild.text;
links[url] = (prefix == null) ? text : '$text ($prefix)';
} else {
final prefix = elem.firstChild.text;
final ul = elem.children[0].children;
links.addAll(detectPages(ul, prefix));
}
}
return links;
}
// This function returns a Map which contains all links to languages detected
// from the sitemap.
Future<Map<String, String>> parseSitemap() async {
final content = await getSitemap();
final sitemap = parse(content).getElementsByClassName('org-ul')[0].children;
return detectPages(sitemap);
}