Skip to content

Commit

Permalink
Improve parallelization performance with trio lib
Browse files Browse the repository at this point in the history
  • Loading branch information
jonaprieto committed Oct 23, 2024
1 parent 9c69ef6 commit ab6fcde
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 186 deletions.
37 changes: 15 additions & 22 deletions mkdocs_juvix/common/preprocesors/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from mkdocs_juvix.common.models import FileLoc, WikiLink
from mkdocs_juvix.env import ENV
from mkdocs_juvix.snippets import SnippetPreprocessor

WIKILINK_PATTERN = re.compile(
r"""
Expand Down Expand Up @@ -44,7 +45,7 @@ def __init__(self, mkconfig, snippet_preprocessor, env: Optional[ENV] = None):
else:
self.env = env

self.snippet_preprocessor = snippet_preprocessor
self.snippet_preprocessor: SnippetPreprocessor = snippet_preprocessor
# remove the mkdocs_juvix.snippets plugin from the config
if "mkdocs_juvix.snippets" in self.mkconfig.mdx_configs:
self.mkconfig.mdx_configs.pop("mkdocs_juvix.snippets")
Expand Down Expand Up @@ -80,13 +81,14 @@ def run(self, lines) -> List[str]:
return lines

filepath = Path(current_page_url)
rel_to_docs = filepath.relative_to(self.env.DOCS_ABSPATH)

try:
cache_filepath: Optional[Path] = (
self.env.get_filepath_for_wikilinks_in_cache(filepath)
)
except Exception as e:
log.error(f"Error getting cache filepath for file {filepath}: {e}")
log.error(f"Error getting cache filepath for file {rel_to_docs}: {e}")
return lines

if original_filepath:
Expand All @@ -100,17 +102,11 @@ def run(self, lines) -> List[str]:
):
return cache_filepath.read_text().split("\n")

if self.run_snippet_preprocessor:
time_start = time.time()
lines = self.snippet_preprocessor.run(lines)
time_end = time.time()
log.info(
f"Snippet finished in {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds"
)
time_start = time.time()

# if self.run_snippet_preprocessor:
# lines = self.snippet_preprocessor.run(lines)

log.info(
f"Processing wikilinks on file {Fore.GREEN}{filepath}{Style.RESET_ALL}"
)
# Combine all lines into a single string
full_text = "\n".join(lines)
# Find all code blocks, HTML comments, and script tags in a single pass
Expand All @@ -119,7 +115,6 @@ def run(self, lines) -> List[str]:
)

intervals = []
time_start = time.time()
try:
for match in ignore_blocks.finditer(full_text):
intervals.append((match.start(), match.end(), 1))
Expand All @@ -129,20 +124,20 @@ def run(self, lines) -> List[str]:
except Exception as e:
log.error(f"Error occurred while processing ignore patterns: {str(e)}")
return lines

intervals_where_not_to_look = None
if intervals:
starts, ends, ids = map(np.array, zip(*intervals))
ignore_tree = NCLS(starts, ends, ids)
else:
ignore_tree = NCLS([], [], [])
intervals_where_not_to_look = NCLS(starts, ends, ids)

# Find all wikilinks
str_wikilinks = list(WIKILINK_PATTERN.finditer(full_text))

replacements = []
for m in str_wikilinks:
start, end = m.start(), m.end()
if not list(ignore_tree.find_overlap(start, end)):
if intervals_where_not_to_look and not list(
intervals_where_not_to_look.find_overlap(start, end)
):
link = self.process_wikilink(
self.mkconfig, full_text, m, current_page_url
)
Expand All @@ -156,13 +151,11 @@ def run(self, lines) -> List[str]:
for start, end, new_text in reversed(replacements):
full_text = full_text[:start] + new_text + full_text[end:]
time_end = time.time()

log.info(
f"Processing wikilinks took {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds"
log.debug(
f"Snippet and wikilinks processing took {Fore.GREEN}{(time_end - time_start):.5f}{Style.RESET_ALL} seconds on file {Fore.GREEN}{rel_to_docs}{Style.RESET_ALL}"
)

if cache_filepath:
log.debug(f"Writing wikilinks to cache for file {original_filepath}")
try:
cache_filepath.parent.mkdir(parents=True, exist_ok=True)
cache_filepath.write_text(full_text)
Expand Down
16 changes: 16 additions & 0 deletions mkdocs_juvix/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,22 @@ def update_hash_file(self, filepath: Path) -> Optional[Tuple[Path, str]]: # noq
log.error(f"Error updating hash file: {e}")
return None

def get_expected_filepath_for_juvix_markdown_output_in_cache(
self, filepath: Path
) -> Optional[Path]:
cache_markdown_filename: Optional[str] = self.get_filename_module_by_extension(
filepath, extension=".md"
)
if cache_markdown_filename is None:
return None
rel_to_docs = filepath.relative_to(self.DOCS_ABSPATH)
cache_markdown_filepath: Path = (
self.CACHE_MARKDOWN_JUVIX_OUTPUT_PATH
/ rel_to_docs.parent
/ cache_markdown_filename
)
return cache_markdown_filepath

def get_expected_filepath_for_juvix_isabelle_output_in_cache(
self, filepath: Path
) -> Optional[Path]:
Expand Down
42 changes: 16 additions & 26 deletions mkdocs_juvix/images.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import re
import shutil
import subprocess
import time
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Optional
from typing import List, Optional

import numpy as np
import trio # type: ignore
from colorama import Fore, Style # type: ignore
from markdown.extensions import Extension # type: ignore
from markdown.preprocessors import Preprocessor # type: ignore
from mkdocs.config.defaults import MkDocsConfig # type: ignore
from mkdocs.plugins import BasePlugin, get_plugin_logger
from mkdocs.structure.files import Files # type: ignore
from mkdocs.structure.pages import Page
from ncls import NCLS # type: ignore
from ncls import NCLS

from mkdocs_juvix.common.utils import fix_site_url # type:ignore
from mkdocs_juvix.env import ENV # type: ignore
Expand Down Expand Up @@ -172,12 +171,12 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:

dot_files = list(self.env.IMAGES_PATH.glob("*.dot"))

def process_dot_file(dot_file: Path):
async def process_dot_file(dot_file: Path):
try:
cond = self.env.new_or_changed_or_not_exists(dot_file)
svg_file = dot_file.with_suffix(".dot.svg")
if cond:
self._generate_dot_svg(dot_file)
await self._generate_dot_svg(dot_file)
if svg_file.exists():
log.info(f"Generated SVG: {svg_file}")
self.env.update_hash_file(dot_file)
Expand All @@ -186,21 +185,18 @@ def process_dot_file(dot_file: Path):
log.error(f"Error generating SVG for {dot_file}: {e}")
return None

async def run_in_parallel(dot_files: List[Path]):
async with trio.open_nursery() as nursery:
for dot_file in dot_files:
nursery.start_soon(process_dot_file, dot_file)

if dot_files:
time_start = time.time()
trio.run(run_in_parallel, dot_files)
time_end = time.time()
log.info(
f"Generating {Fore.GREEN}{len(dot_files)}{Style.RESET_ALL} SVG images"
f"SVG generation took {Fore.GREEN}{time_end - time_start:.5f}{Style.RESET_ALL} seconds"
)
for dot_file in dot_files:
process_dot_file(dot_file)

with ThreadPoolExecutor() as executor:
results = list(executor.map(process_dot_file, dot_files))
executor.shutdown(wait=True)

for result in results:
if result is None:
log.error("Failed to generate SVG for one of the DOT files")
exit(1)

imgext_instance = ImgExtension(config=config, env=self.env)
config.markdown_extensions.append(imgext_instance) # type: ignore
Expand All @@ -209,7 +205,7 @@ def process_dot_file(dot_file: Path):
config.setdefault("current_page", None) # current page being processed
return config

def _generate_dot_svg(self, dot_file: Path) -> Optional[Path]:
async def _generate_dot_svg(self, dot_file: Path) -> Optional[Path]:
svg_file = dot_file.with_suffix(".dot.svg")

if not svg_file.exists():
Expand All @@ -224,13 +220,7 @@ def _generate_dot_svg(self, dot_file: Path) -> Optional[Path]:
]

try:
time_start = time.time()
log.info(f"Generating SVG for {Fore.GREEN}{dot_file}{Style.RESET_ALL}")
output = subprocess.run(dot_cmd)
time_end = time.time()
log.info(
f"Generation took {Fore.GREEN}{time_end - time_start:.5f}{Style.RESET_ALL} seconds"
)
output = await trio.run_process(dot_cmd)
if output.returncode != 0:
log.error(f"Error running graphviz: {output}")
return None
Expand Down
Loading

0 comments on commit ab6fcde

Please sign in to comment.