Added keywords to be excluded from page listing

This commit is contained in:
Lucien Cartier-Tilet 2020-05-05 16:48:09 +02:00
parent d86bb23c88
commit 004a333ffe
Signed by: phundrak
GPG Key ID: BD7789E705CB8DCA
1 changed files with 8 additions and 3 deletions

View File

@ -3,6 +3,8 @@ import 'dart:html' show HttpRequest;
import 'package:html/parser.dart' show parse;
import 'package:html/dom.dart' show Element;
final excluded_keywords = ['index', 'CONTRIBUTING', 'LICENSE', 'README'];
// Get the sitemap content
Future<String> getSitemap() async {
const path = 'sitemap.html';
@ -18,9 +20,12 @@ Future<String> getSitemap() async {
Map<String, String> detectPages(List<Element> sitemap, [String prefix]) {
final links = <String, String>{};
for (var elem in sitemap) {
if (elem.outerHtml.contains('index')) {
continue;
} else if (elem.innerHtml.startsWith('<a')) {
for(var kw in excluded_keywords) {
if (elem.outerHtml.contains(kw)) {
continue;
}
}
if (elem.innerHtml.startsWith('<a')) {
elem = elem.firstChild;
final url = elem.attributes['href'];
final text = elem.firstChild.text;