Check links in CI with htmlcheck

Remove test_links.py
This commit is contained in:
hypevhs 2024-08-22 19:04:03 -05:00
parent 9e2c8fe2d5
commit 1f27801aad
4 changed files with 47 additions and 51 deletions

3
.gitignore vendored
View file

@ -7,3 +7,6 @@ node_modules/
public/
resources/
.deploy*/
# htmltest
tmp/

View file

@ -1,28 +1,43 @@
variables:
GIT_SUBMODULE_STRATEGY: recursive
test-links:
image: python:latest
script:
- python3 ./test_links.py
## GitLab defaults to:
# stages:
# - build
# - test
# - deploy
test-trailing-spaces:
script:
- ./test_trailing_spaces.sh
test-build:
image: registry.gitlab.com/pages/hugo/hugo_extended
script:
- hugo
except:
- main
pages:
build-site:
stage: build
image: registry.gitlab.com/pages/hugo/hugo_extended
script:
- hugo
artifacts:
paths:
- public
test-html:
stage: test
image:
name: wjdp/htmltest:latest
entrypoint: [""]
cache:
key: htmltest-cache
paths:
- tmp/
script:
- htmltest
test-trailing-spaces:
stage: test
needs: [] # ok to run before build-site
script:
- ./test_trailing_spaces.sh
# The GitLab Pages job must always be named "pages", and result in a "public" artifact.
# Our "public" artifact is created above, but GitLab jobs will fetch all artifacts from previous stages.
pages:
stage: deploy
script: ls -ld public
only:
- main

11
.htmltest.yml Normal file
View file

@ -0,0 +1,11 @@
# https://github.com/wjdp/htmltest?tab=readme-ov-file#wrench-configuration
DirectoryPath: "public"
EnforceHTTPS: true
IgnoreURLs:
# - "example.com"
# - "^/misc/js/script.js$"
- "https://gitlab.com/lvra/lvra.gitlab.io/-/edit"
# IgnoreDirs:
# - "lib"
CacheExpires: "6h"

View file

@ -1,33 +0,0 @@
#!/usr/bin/env python3
from pathlib import Path
import re
from typing import List
from os.path import exists
def get_all_markdown_files():
return Path("content").rglob("*.md")
def find_links(f: str) -> List[str]:
with open(f, "r") as fd:
content = fd.read()
return [
link.lstrip("]").strip("()") for link in re.findall(r"\]\(\/[\w\/]+\)", content)
]
def verify_link_exists(link: str) -> bool:
return exists(f"content{link}/_index.md") or exists(f'content{link.rstrip("/")}.md')
md_files = get_all_markdown_files()
res = 0
for f in md_files:
for link in find_links(f):
if not verify_link_exists(link):
print(f"E: {f}: {link} does not exist")
res = 1
exit(res)