diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e2ba079322..692194cd462 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -179,6 +179,11 @@ jobs: run: | #*.po and documentation.pot are modifyed by build. Ignore them for now. .github/scripts/verify-clean-repo.sh ':(exclude)docs/po/*.po' ':(exclude)docs/po/documentation.pot' + - name: HTML checks + run: | + set -x + #-w sets warn only, remove to generate a CI failure on error + scripts/htmlcheck.sh -w - name: Tar linuxcnc-doc run: | set -x diff --git a/docs/src/Submakefile b/docs/src/Submakefile index 1e6cc85b64f..547b389290e 100644 --- a/docs/src/Submakefile +++ b/docs/src/Submakefile @@ -1,11 +1,8 @@ -.PHONY: docs docclean checkref +.PHONY: docs docclean .PHONY: pdfdocs htmldocs install-doc install-doc-pdf install-doc-html SHELL=/bin/bash -# To make linuxcnc-checklink widely available -export PATH:=$(BASEPWD)/../scripts:$(PATH) - # Ruby (asciidoctor / asciidoctor-pdf) reads source files in the locale's # default external encoding. Containerised builds often inherit POSIX/C, # which makes Ruby treat every UTF-8 byte > 0x7f as an invalid sequence @@ -573,7 +570,7 @@ $(DOC_OUT_HTML)/pdf/index.html: $(PDF_TARGETS) ../scripts/make-docs-pdf-index @mkdir -p $(dir $@) $(Q)../scripts/make-docs-pdf-index -htmldocs: svgs_made_from_dots .htmldoc-stamp checkref_en +htmldocs: svgs_made_from_dots .htmldoc-stamp # When translations are enabled, the .adoc files in $(L)/ are produced by # the translateddocs target (po4a). Teach make how to ask for them: the @@ -590,8 +587,8 @@ endif # Depend on the stamp files, not the phony copy_asciidoc_files / # gen_complist aliases: a phony prereq is always "newer", so naming them -# re-touched .htmldoc-stamp every run, dragging checkref and the css copy -# with it. The stamps fire only when their real inputs change. +# re-touched .htmldoc-stamp every run, dragging the css copy +# with it. The stamps fire only when their real inputs change. .htmldoc-stamp: .copy-asciidoc-stamp $(DOC_DIR)/.gen_complist-stamp $(HTML_TARGETS) .images-stamp .include-stamp $(DOC_OUT_HTML)/asciidoctor.css $(DOC_OUT_HTML)/rouge-github.css .lang-switcher-stamp touch $@ @@ -681,43 +678,16 @@ $(DOC_OUT_HTML)/rouge-github.css: $(DOC_SRCDIR)/render-rouge-css.rb # gen_complist.py here after MAN_HTML_TARGETS would rewrite the file # with HTML-existence-dependent content (different miss_in_man set), # bumping mtime past .pot and re-triggering po4a on the next build. -# Broken-link validation against generated HTML is checkref's job. # MAN_HTML_TARGETS is order-only: this stamp only needs the manpages built, # it does not read their content, and the post-processor rewrites them in # place later -- a normal prerequisite would then re-fire this stamp (and the -# .htmldoc-stamp / checkref that depend on it) on every subsequent make. +# .htmldoc-stamp that depend on it) on every subsequent make. $(DOC_DIR)/.gen_complist-stamp: $(DOC_OUT_ADOC)/en/hal/components_gen.adoc | $(MAN_HTML_TARGETS) mkdir -p $(DOC_OUT_HTML)/en/hal @touch $@ gen_complist: $(DOC_DIR)/.gen_complist-stamp - -CHECKREF_TARGETS := checkref_en $(foreach L,$(LANGUAGES),checkref_$(L)) -.PHONY: $(CHECKREF_TARGETS) -checkref: $(CHECKREF_TARGETS) - -# checkref_* stay phony aliases; the link check lives in stamp-gated rules -# so it only re-runs when the HTML changes. .htmldoc-stamp is filtered out -# of the args so the stamp file is not handed to the link checker. -checkref_en: $(DOC_DIR)/.checkref-english-stamp -# --warn-on-failure: link checking has been a silent no-op for a long time -# (w3c-linkchecker disables file:// URIs), so the tree may carry accumulated -# broken links. Report them without breaking the build for now; drop this -# flag once the backlog is cleared so regressions fail the build again. -$(DOC_DIR)/.checkref-english-stamp: $(DOC_TARGETS_HTML_EN) $(DOC_OUT_HTML)/en/index.html $(DOC_OUT_HTML)/en/gcode.html .htmldoc-stamp - $(DOC_SRCDIR)/checkref --warn-on-failure English $(filter %.html,$^) - @touch $@ - -# Pattern rule for all languages. Per-lang gcode.html is po4a-translated -# from src/gcode.html and lands as a sibling to each lang's index.html. -checkref_%: $(DOC_DIR)/.checkref-%-stamp ; -$(DOC_DIR)/.checkref-%-stamp: $$(DOC_TARGETS_HTML_$$(call uc,$$*)) \ - $$(DOC_OUT_HTML)/%/gcode.html .htmldoc-stamp - $(DOC_SRCDIR)/checkref --warn-on-failure $(call lang_name,$*) $(filter %.html,$$^) - @touch $@ - - MAN_SRCS_NOSO = $(patsubst $(DOC_MAN)/%,%, \ $(shell grep -s -L '^\.so ' $(MAN_SRCS))) @@ -1324,7 +1294,6 @@ docclean: -rm -f .include-stamp -rm -f $(DOC_DIR)/.translateddocs-stamp -rm -f $(DOC_DIR)/.gen_complist-stamp - -rm -f $(DOC_DIR)/.checkref-*-stamp -rm -f $(OTHER_DOTFILES:.dot=.svg) diff --git a/docs/src/checklinks.py b/docs/src/checklinks.py deleted file mode 100644 index 7bb0f7f3e26..00000000000 --- a/docs/src/checklinks.py +++ /dev/null @@ -1,127 +0,0 @@ -import os, sys, sgmllib, cookielib, urllib, htmlentitydefs - -if len(sys.argv) > 1: - ref = sys.argv[1] -else: - ref = "../html/gcode.html" - -if len(sys.argv) > 2: - targets = sys.argv[2:] -else: - targets = None - -def get(attr, attrs, default=""): - attr = attr.lower() - for k, v in attrs: - if k.lower() == attr: return v - return default - -class MetaHandler: - def do_meta(self, attrs): - equiv = get("http-equiv", attrs) - content = get("content", attrs) - if equiv != "content-type": return - attrs = cookielib.split_header_words([content])[0] - encoding = get("charset", attrs) - if encoding == "ASCII": encoding = "ISO-8859-1" - if encoding: self.encoding = encoding - -class get_refs(sgmllib.SGMLParser, MetaHandler): - entitydefs = htmlentitydefs.entitydefs - - def __init__(self, verbose=0): - sgmllib.SGMLParser.__init__(self, verbose) - self.refs = set() - self.encoding = None - - def do_a(self, attrs): - href = get('href', attrs) - if self.encoding: - href = href.decode(self.encoding) - href = urllib.unquote(href) - self.refs.add(href) - -class get_anchors(sgmllib.SGMLParser, MetaHandler): - entitydefs = htmlentitydefs.entitydefs - - def __init__(self, verbose=0): - sgmllib.SGMLParser.__init__(self, verbose) - self.anchors = set() - self.encoding = None - - def unknown_starttag(self, tag, attrs): - id = get('id', attrs) - if id: - self.do_a([('name', id)]) - - def unknown_endtag(self, tag): pass - - def do_a(self, attrs): - name = get('name', attrs, get('id', attrs)) - if self.encoding: - name = name.decode(self.encoding) - name = urllib.unquote(name) - if name: - self.anchors.add(name) - -_anchors = {} -def get_anchors_cached(filename): - if filename not in _anchors: - a = get_anchors() - a.feed(open(filename).read()) - _anchors[filename] = a.anchors - return _anchors[filename] - -def resolve_file(src, target): - if "#" in target: - a, b = target.split("#", 1) - else: - a, b = target, None - - a = a or src - - return os.path.join(os.path.dirname(ref), a), b - -def resolve(target, anchor): - if not anchor: return True - - anchors = get_anchors_cached(target) - return anchor in anchors - -refs = get_refs() -refs.feed(open(ref).read()) -refs = refs.refs - -missing_anchor = set() -missing_file = set() -unlisted_targets = set() -good = set() -for r in refs: - target, anchor = resolve_file(ref, r) - if targets and not target in targets: - unlisted_targets.add(target) - elif not os.path.exists(target): - missing_file.add(r) - elif not resolve(target, anchor): - missing_anchor.add(r) - else: - good.add(r) - -if missing_file: - print("Files linked to in %s but could not be found:" % ( - os.path.basename(ref),)) - for i in sorted(missing_file): - print("\t%r" % i) -if missing_anchor: - print("Anchors used in %s but not defined in linked file:" % ( - os.path.basename(ref),)) - for i in sorted(missing_anchor): - print("\t%r" % i) -if unlisted_targets: - print("Links to files not listed as targets:") - for i in sorted(unlisted_targets): - print("\t%r" % i) - print("If all link targets are not listed in the Submakefile, then the results of this program is unreliable.") -print("Good links: %d/%d" % (len(good), len(refs))) -if missing_anchor or missing_file or unlisted_targets: - raise SystemExit, 1 diff --git a/docs/src/checkref b/docs/src/checkref deleted file mode 100755 index bdd62af0585..00000000000 --- a/docs/src/checkref +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash -#set -x - -WARN_ON_FAILURE=0 -if [ "$1" = "--warn-on-failure" ]; then - WARN_ON_FAILURE=1 - shift -fi - -LANGUAGE="$1" -shift - -if [ -z "$(which linuxcnc-checklink)" ]; then - echo "ERROR: checklink not found, install w3c-linkchecker for HTML link validation" 1>&2 - exit 1 -fi - - -BAD_LINKS=0 -CHECKED=0 -SKIPPED=0 -for F in "$@"; do - OUT=.checklink.$LANGUAGE.$(basename "$F").tmp - rm -f "$OUT" - # --follow-file-links is required: recent w3c-linkchecker refuses file:// - # URIs by default, so without it checklink never inspects the local file - # and validates nothing. - # - # Drop checklink's own "uninitialized value ... checklink line N" Perl - # noise (not link results). checklink stays first, so PIPESTATUS[0] holds. - linuxcnc-checklink --quiet --follow-file-links --exclude "(http|https|irc)://" "$F" 2>&1 \ - | grep -vE 'Use of uninitialized value .* at .*checklink line [0-9]+' | tee "$OUT" - STATUS=${PIPESTATUS[0]} - # Distinguish "checklink never inspected the file" from "checklink ran and - # found problems". It exits 64 when it reports broken links, so a nonzero - # exit is not itself a failure; only a shell exec failure (126/127) or an - # explicit refusal to read the document means nothing was validated. - if [ "$STATUS" -eq 126 ] || [ "$STATUS" -eq 127 ] || \ - grep -E -q "(Access to 'file' URIs has been disabled|checklink not configured)" "$OUT"; then - echo "*** warning: linuxcnc-checklink could not validate $(basename "$F"); skipped" 1>&2 - SKIPPED=$((SKIPPED + 1)) - rm -f "$OUT" - continue - fi - CHECKED=$((CHECKED + 1)) - if grep -E -q 'List of (broken links and other issues|duplicate and empty anchors)' "$OUT"; then - BAD_LINKS=1 - fi - rm -f "$OUT" -done - -if [ $BAD_LINKS -eq 1 ]; then - RET_VAL=1 - echo "***" 1>&2 - echo "*** warning: bad links found in $LANGUAGE docs!" 1>&2 - if [ $WARN_ON_FAILURE -eq 1 ]; then - echo "*** oh well, continuing anyway" 1>&2 - RET_VAL=0 - fi - echo "***" 1>&2 - exit $RET_VAL -elif [ $CHECKED -eq 0 ]; then - # Nothing validated: warn instead of claiming success, but do not fail - # the build (an unusable checklink is an environment problem). - echo "***" 1>&2 - echo "*** warning: link checking skipped for $LANGUAGE docs;" 1>&2 - echo "*** linuxcnc-checklink validated no files ($SKIPPED skipped)." 1>&2 - echo "*** install w3c-linkchecker and re-run ./configure to enable it." 1>&2 - echo "***" 1>&2 -else - echo "###" - echo "### language: $LANGUAGE" - if [ $SKIPPED -gt 0 ]; then - echo "### checked links are good ($SKIPPED file(s) skipped)!" - else - echo "### all links are good!" - fi - echo "###" -fi - diff --git a/scripts/htmlcheck.sh b/scripts/htmlcheck.sh new file mode 100755 index 00000000000..7b78c8b22a8 --- /dev/null +++ b/scripts/htmlcheck.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +usage () { + P=${0##*/} + cat < /dev/null; then + echo "ERROR: checklink not found in PATH, install w3c-linkchecker for HTML link validation" 1>&2 + echo "Can be downloaded from https://github.com/w3c/link-checker if there is no package for your distribution." 1>&2 + echo "link-checker/bin/checklink is a perl script, nothing else is needed from the above repo." 1>&2 + exit 1 +fi + +CHKOPT=( --follow-file-links ) +if [ "$VERBOSE" -eq 0 ]; then + CHKOPT+=( --quiet ) +fi +if [ "$EXTERNAL" -eq 0 ]; then + CHKOPT+=( --exclude "(http|https|irc)://" ) +fi + +#Note: grep is used to filer out an error message due to a bug in checklink in debian/ubuntu +#should be removed as soon as this package is fixed (upstream is already fine) + +warnval=0 +retval=0 +if [ $# -gt 0 ]; then + # Only process individual files if passed on the command line. + for f in "$@"; do + if [ -r "$f" ]; then + checklink "${CHKOPT[@]}" "$f" 2>&1 | grep -vE 'Use of uninitialized value .* at .*checklink line [0-9]+' + ret="${PIPESTATUS[0]}" + if [ "$ret" -ne 0 ]; then + echo "'$f': File check: Fail" + if [ $WARN != 0 ]; then + warnval="$ret" + else + retval="$ret" + fi + else + echo "'$f': File check: OK" + fi + else + echo "Cannot read file '$f'" + retval=1 + fi + done +else + #Otherwhise, recursively check docs/build/html/index.html + f="$(dirname "$0")/../docs/build/html/index.html" + if [ -r "$f" ]; then + CHKOPT+=( --recursive ) + checklink "${CHKOPT[@]}" "$f" 2>&1 | grep -vE 'Use of uninitialized value .* at .*checklink line [0-9]+' + ret="${PIPESTATUS[0]}" + if [ "$ret" -ne 0 ]; then + echo "'$f': Recursive check: Fail" + if [ $WARN != 0 ]; then + warnval="$ret" + else + retval="$ret" + fi + else + echo "'$f': Recursive check: OK" + fi + else + echo "Cannot read file '$f', did you build the doc first?" + retval=1 + fi +fi + +#Generate a CI warning or error if running in CI +#See: https://docs.github.com/en/actions/reference/workflows-and-actions/workflow-commands +if [ -n "${GITHUB_ACTIONS:-}" ]; then + if [ "$retval" != 0 ]; then + echo "::error title=HTML checks failed::checks failed" + elif [ "$warnval" != 0 ]; then + echo "::warning title=HTML checks failed::checks failed" + fi +fi + +exit "$retval" diff --git a/scripts/linuxcnc-checklink.in b/scripts/linuxcnc-checklink.in deleted file mode 100644 index 15e8bf0bead..00000000000 --- a/scripts/linuxcnc-checklink.in +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -# Fail clearly if configure left @CHECKLINK@ empty, rather than with the -# cryptic "exec: : Permission denied". -CHECKLINK="@CHECKLINK@" -exec "${CHECKLINK:?checklink not configured; install w3c-linkchecker and re-run ./configure}" "$@" \ No newline at end of file diff --git a/src/configure.ac b/src/configure.ac index 6ec5aa1717c..8249c972bcb 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -1215,16 +1215,6 @@ For a fully custom path, set the var directly: fi fi -# Programs required only for building the HTML documentation -if ( test "$BUILD_DOCS_HTML" = "yes" ) ; then - AC_PATH_PROG(CHECKLINK,checklink,"none") - if ( test "none" = "$CHECKLINK" ) ; then - AC_MSG_WARN([no checklink, HTML documentation cannot be built -install with "sudo apt-get install w3c-linkchecker"]) - BUILD_DOCS_HTML=no - fi -fi - AC_ARG_ENABLE(build-documentation-translation, AS_HELP_STRING( [--enable-build-documentation-translation], @@ -1761,7 +1751,6 @@ AC_CONFIG_FILES([../scripts/halcmd_twopass], [chmod +x ../scripts/halcmd_twopass AC_CONFIG_FILES([../scripts/realtime], [chmod +x ../scripts/realtime]) AC_CONFIG_FILES([../scripts/runtests], [chmod +x ../scripts/runtests]) AC_CONFIG_FILES([../scripts/linuxcnc_var], [chmod +x ../scripts/linuxcnc_var]) -AC_CONFIG_FILES([../scripts/linuxcnc-checklink], [chmod +x ../scripts/linuxcnc-checklink]) AC_CONFIG_FILES(Makefile.inc) AC_CONFIG_FILES(Makefile.modinc) AC_CONFIG_FILES(../tcl/linuxcnc.tcl)