This repository has been archived on 2023-02-26. You can view files and clone it, but cannot push or open issues or pull requests.
langue-phundrak-com/web/dart/parse_sitemap.dart

44 lines
1.3 KiB
Dart
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import 'dart:html' show HttpRequest;
import 'package:html/parser.dart' show parse;
import 'package:html/dom.dart' show Element;
// Get the sitemap content
Future<String> getSitemap() async {
const path = 'sitemap.html';
try {
return await HttpRequest.getString(path);
} catch (e) {
print('Couldnt open $path');
}
return 'Error';
}
// Parse the list of elements and detect pages from this list
Map<String, String> detectPages(List<Element> sitemap, [String prefix]) {
final links = <String, String>{};
for (var elem in sitemap) {
if (elem.outerHtml.contains('index')) {
continue;
} else if (elem.innerHtml.startsWith('<a')) {
elem = elem.firstChild;
final url = elem.attributes['href'];
final text = elem.firstChild.text;
links[url] = (prefix == null) ? text : '$text ($prefix)';
} else {
final prefix = elem.firstChild.text;
final ul = elem.children[0].children;
links.addAll(detectPages(ul, prefix));
}
}
return links;
}
// This function returns a Map which contains all links to languages detected
// from the sitemap.
Future<Map<String, String>> parseSitemap() async {
final content = await getSitemap();
final sitemap = parse(content).getElementsByClassName('org-ul')[0].children;
return detectPages(sitemap);
}