v0.4.6 (#14)

anoma · Dec 2, 2024 · 4bea8e6 · 4bea8e6
1 parent d8de19c
commit 4bea8e6
Show file tree

Hide file tree

Showing 8 changed files with 273 additions and 183 deletions.
diff --git a/mkdocs_juvix/common/preprocesors/links.py b/mkdocs_juvix/common/preprocesors/links.py
@@ -5,7 +5,7 @@
 from urllib.parse import urljoin
 
 import numpy as np  # type: ignore
-from colorama import Fore, Style
+from colorama import Fore, Style  # type: ignore
 from fuzzywuzzy import fuzz  # type: ignore
 from markdown.preprocessors import Preprocessor  # type: ignore
 from ncls import NCLS  # type: ignore
@@ -101,10 +101,9 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink
     )
 
     link_page = link.page
-    # print white space with "X"
 
-    if len(config["url_for"].get(link_page, [])) > 1 and link_page in config["url_for"]:
-        possible_pages = config["url_for"][link_page]
+    if len(config.get("url_for", {}).get(link_page, [])) > 1 and link_page in config.get("url_for", {}):
+        possible_pages = config.get("url_for", {}).get(link_page, [])
         hint = link.hint if link.hint else ""
         token = hint + link_page
         coefficients = {p: fuzz.WRatio(fun_normalise(p), token) for p in possible_pages}
@@ -121,8 +120,8 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink
                 Our choice: {link.html_path}"""
         )
 
-    elif link_page in config["url_for"]:
-        link.html_path = config["url_for"].get(link_page, [""])[0]
+    elif link_page in config.get("url_for", {}):
+        link.html_path = config.get("url_for", {}).get(link_page, [""])[0]
         log.debug(f"Single page found. html_path: {link.html_path}")
     else:
         log.debug("Link page not in config['url_for']")
@@ -135,11 +134,11 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink
 
         # Update links_found TODO: move this to the model
         try:
-            url_page = config["url_for"][link_page][0]
-            if url_page in config["nodes"]:
-                actuallink = config["nodes"][url_page]
+            url_page = config.get("url_for", {}).get(link_page, [""])[0]
+            if url_page in config.get("nodes", {}):
+                actuallink = config.get("nodes", {}).get(url_page, {})
                 if actuallink:
-                    pageName = actuallink["page"].get("names", [""])[0]
+                    pageName = actuallink.get("page", {}).get("names", [""])[0]
                     html_path: str = link.html_path if link.html_path else ""
                     config.get("links_found", []).append(
                         {
@@ -192,7 +191,8 @@ def _run(self, content: str) -> str:
 
         # Find all code blocks, HTML comments, and script tags in a single pass
         ignore_blocks = re.compile(
-            r"((`{1,3})(?:[\s\S]*?)(\2)|<!--[\s\S]*?-->|<script>[\s\S]*?</script>)",
+            # r"((`{1,3})(?:[\s\S]*?)(\2)|<!--[\s\S]*?-->|<script>[\s\S]*?</script>)",
+            r"((`{1,3})(?:[\s\S]*?)(\2))",
             re.DOTALL,
         )
         intervals = []
@@ -205,10 +205,12 @@ def _run(self, content: str) -> str:
         except Exception as e:
             log.error(f"Error occurred while processing ignore patterns: {str(e)}")
             return content
-        intervals_where_not_to_look = None
-        if intervals:
-            starts, ends, ids = map(np.array, zip(*intervals))
-            intervals_where_not_to_look = NCLS(starts, ends, ids)
+
+        # Review this for later improvements
+        # intervals_where_not_to_look = None
+        # if intervals:
+        #     starts, ends, ids = map(np.array, zip(*intervals))
+        #     intervals_where_not_to_look = NCLS(starts, ends, ids)
 
         # Find all wikilinks
         str_wikilinks = list(WIKILINK_PATTERN.finditer(content))
@@ -218,9 +220,7 @@ def _run(self, content: str) -> str:
             start, end = m.start(), m.end()
 
             # TODO: review this
-            if intervals_where_not_to_look and not list(
-                intervals_where_not_to_look.find_overlap(start, end)
-            ):
+            if True:
                 log.debug(
                     f"{Fore.YELLOW}Processing wikilink: {m.group(0)}{Style.RESET_ALL}"
                 )

diff --git a/mkdocs_juvix/env.py b/mkdocs_juvix/env.py
@@ -14,14 +14,13 @@
 
 from colorama import Fore, Style  # type: ignore
 from mkdocs.config.defaults import MkDocsConfig
-from mkdocs.plugins import get_plugin_logger
 from semver import Version
 
 import mkdocs_juvix.utils as utils
 from mkdocs_juvix.juvix_version import MIN_JUVIX_VERSION
 from mkdocs_juvix.utils import is_juvix_markdown_file
 
-log = get_plugin_logger(f"{Fore.BLUE}[juvix_mkdocs] (env) {Style.RESET_ALL}")
+from mkdocs_juvix.logger import log
 
 BASE_PATH = Path(__file__).parent
 FIXTURES_PATH = BASE_PATH / "fixtures"
@@ -182,6 +181,12 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
             )
             exit(1)
 
+        if not self.CACHE_ABSPATH.exists():
+            log.info(
+                f"{Fore.YELLOW}Creating cache directory {self.CACHE_ABSPATH}{Style.RESET_ALL}"
+            )
+            self.CACHE_ABSPATH.mkdir(parents=True, exist_ok=True)
+
         if (
             self.CACHE_ABSPATH.exists()
             and self.REMOVE_CACHE

diff --git a/mkdocs_juvix/links.py b/mkdocs_juvix/links.py
@@ -4,13 +4,12 @@
 
 import json
 import re
-from concurrent.futures import ThreadPoolExecutor
 from os import getenv
 from pathlib import Path
 from typing import Dict, List, Optional
 from urllib.parse import urljoin
 
-from colorama import Fore, Style  # type: ignore
+from tqdm import tqdm as sync_tqdm  # type: ignore
 from markdown.extensions import Extension  # type: ignore
 from mkdocs.config.defaults import MkDocsConfig
 from mkdocs.structure.files import File, Files
@@ -21,7 +20,7 @@
 from mkdocs_juvix.common.preprocesors.links import WLPreprocessor
 from mkdocs_juvix.common.utils import fix_site_url, get_page_title
 from mkdocs_juvix.env import ENV
-from mkdocs_juvix.logger import log
+from mkdocs_juvix.logger import clear_line, clear_screen, log
 
 files_relation: List[ResultEntry] = []
 EXCLUDED_DIRS = [
@@ -57,7 +56,6 @@ def extendMarkdown(self, md):  # noqa: N802
 
 TOKEN_LIST_WIKILINKS: str = "<!-- list_wikilinks -->"
 
-
 class WikilinksPlugin:
     env: Optional[ENV] = None
 
@@ -100,35 +98,40 @@ def on_pre_build(self, config: MkDocsConfig) -> None:
         config["wikilinks_issues"] = 0
         config["nodes"] = {}
         node_index = 0
-
-        for _url, page in _extract_aliases_from_nav(config["nav"]):
-            url = urljoin(config["site_url"], _url)
-
-            config["aliases_for"][url] = [page]
-            config["url_for"].setdefault(page, [])
-            config["url_for"][page].append(url)
-
-            # Create a new entry if the URL is not already present in config["nodes"]
-            if url not in config["nodes"]:
-                config["nodes"][url] = {
-                    "index": node_index,
-                    "page": {"names": [], "path": _url.replace("./", "")},
-                }
-            # Append the page to the "names" list
-            config["nodes"][url]["page"]["names"].append(page)
-            node_index += 1
+        nav_items = list(_extract_aliases_from_nav(config["nav"]))
+
+        with sync_tqdm(total=len(nav_items), desc="> processing nav items") as pbar:
+            for _url, page in nav_items:
+                url = urljoin(config["site_url"], _url)
+                config["aliases_for"][url] = [page]
+                config["url_for"].setdefault(page, [])
+                config["url_for"][page].append(url)
+
+                # Create a new entry if the URL is not already present in config["nodes"]
+                if url not in config["nodes"]:
+                    config["nodes"][url] = {
+                        "index": node_index,
+                        "page": {"names": [], "path": _url.replace("./", "")},
+                    }
+                # Append the page to the "names" list
+                config["nodes"][url]["page"]["names"].append(page)
+                node_index += 1
+                pbar.update(1)
+        clear_line()   
 
         if self.NODES_JSON.exists():
             self.NODES_JSON.unlink()
-
-        with open(self.NODES_JSON, "w") as f:
-            json.dump(
+        try:    
+            with open(self.NODES_JSON, "w") as f:
+                json.dump(
                 {
                     "nodes": config.get("nodes", {}),
                 },
                 f,
                 indent=2,
             )
+        except Exception as e:
+            log.error(f"Error writing nodes.json: {e}")
         config["current_page"] = None  # current page being processed
         return
 
@@ -161,18 +164,17 @@ def process_file(file: File) -> None:
                             _title = _title.strip()
                             _title = re.sub(r'^[\'"`]|["\'`]$', "", _title)
 
-                            if _title not in config["url_for"]:
-                                url = urljoin(config["site_url"], file.url)
+                            if _title not in config.get("url_for", {}):
+                                url = urljoin(config.get("site_url", ""), file.url)
                                 config["url_for"][_title] = [url]
                                 config["aliases_for"][url] = [_title]
-
-        with ThreadPoolExecutor() as executor:
-            list(
-                executor.map(
-                    process_file, filter(lambda f: f.is_documentation_page(), files)
-                )
-            )
-            executor.shutdown(wait=True)
+        clear_screen()
+        with sync_tqdm(total=len(files), desc="> processing files") as pbar:
+            for file in files:
+                if file.is_documentation_page():
+                    process_file(file)
+                    pbar.update(1)
+        clear_line()
 
         if self.LINKS_JSON.exists():
             self.LINKS_JSON.unlink()
@@ -201,16 +203,20 @@ def on_page_content(
         frontmatter has the `list_wikilinks` flag set to true.
         """
         if "current_page" not in config or "nodes" not in config:
+            log.debug("No current_page or nodes in config")
             return html
         current_page = config["current_page"]
         url = current_page.canonical_url.replace(".html", ".md")
         if url not in config["nodes"]:
+            log.debug(f"URL {url} not found in nodes. It's probably ignored because it's not in the mkdocs.yml file.")
             return html
 
         if url not in config["nodes"] or "index" not in config["nodes"][url]:
+            log.debug(f"URL {url} not found in nodes or no index for URL")
             return html
         links_number: List[Dict[str, int]] = config.get("links_number", [])
         if len(links_number) > 0:
+            log.debug(f"Processing {len(links_number)} links for {url}")
             actualindex = config["nodes"][url]["index"]
             result_entry = ResultEntry(
                 file=current_page.url,
@@ -222,6 +228,7 @@ def on_page_content(
             files_relation.append(result_entry)
 
             if page.meta.get("list_wikilinks", False):
+                log.debug(f"Generating wikilinks list for {url}")
                 # Creat a bullet list of links
                 wrapped_links = "<details class='quote'><summary>Relevant internal links on this page</summary><ul>"
                 unique_links = {

diff --git a/mkdocs_juvix/logger.py b/mkdocs_juvix/logger.py
@@ -1,68 +1,43 @@
+import os
 import logging
 from typing import Any, MutableMapping
-
 from colorama import Fore, Style  # type: ignore
 
-
-class PrefixedLogger(logging.LoggerAdapter):
-    """A logger adapter to prefix log messages."""
-
-    def __init__(self, prefix: str, logger: logging.Logger) -> None:
-        """
-        Initialize the logger adapter.
-
-        Arguments:
-            prefix: The string to insert in front of every message.
-            logger: The logger instance.
-        """
-        super().__init__(logger, {})
-        self.prefix = prefix
-
-    def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, Any]:
-        """
-        Process the message.
-
-        Arguments:
-            msg: The message:
-            kwargs: Remaining arguments.
-
-        Returns:
-            The processed message.
-        """
-        return f"{self.prefix}: {msg}", kwargs
-
-
-def get_plugin_logger(name: str) -> PrefixedLogger:
-    """
-    Return a logger for plugins.
-
-    Arguments:
-        name: The name to use with `logging.getLogger`.
-
-    Returns:
-        A logger configured to work well in MkDocs,
-            prefixing each message with the plugin package name.
-
-    Example:
-        ```python
-        from mkdocs.plugins import get_plugin_logger
-
-        log = get_plugin_logger(__name__)
-        log.info("My plugin message")
-        ```
-    """
+DEBUG = os.getenv("DEBUG", "false").lower() == "true"
+print(f"{Fore.GREEN}DEBUG: {DEBUG}")
+
+class Logger(logging.Logger):
+    def __init__(self, logger: logging.Logger):
+        self.logger = logger
+        super().__init__(logger.name, logger.level)
+
+    def info(self, msg, *args, **kwargs):
+        if DEBUG:
+            self.debug(msg, *args, **kwargs)
+        else:
+            super().info(msg, *args, **kwargs)
+
+    def debug(self, msg, *args, **kwargs):
+        if DEBUG:
+            print("-"*100)
+            print(msg, *args, **kwargs)
+            clear_line(2)
+        else:
+            super().debug(msg, *args, **kwargs)
+
+def get_plugin_logger(name: str) -> Logger:
     logger = logging.getLogger(f"mkdocs.plugins.{name}")
-    setattr(logger, "info", lambda msg: clear_screen() and getattr(logger, "info")(msg))
-    return PrefixedLogger(name.split(".", 1)[0], logger)
-
+    setattr(logger, "info", lambda msg: getattr(logger, "info")(msg))
+    return Logger(logger)
 
 log = get_plugin_logger(f"{Fore.BLUE}juvix_mkdocs{Style.RESET_ALL}")
 
-
 def clear_screen():
-    print("\033[H\033[J", end="", flush=True)
-
-
-def clear_line():
-    print("\033[A", end="", flush=True)
-    print("\033[K", end="\r", flush=True)
+    if os.getenv("DEBUG", "false").lower() != "true":
+        print("\033[H\033[J", end="", flush=True)
+
+def clear_line(n=1):
+    if os.getenv("DEBUG", "false").lower() != "true":
+        for _ in range(n):
+            print("\033[A", end="", flush=True)
+        print("\033[K", end="\r", flush=True)