Skip to content

Commit

Permalink
Allow up to 6 levels of headers in the table of content
Browse files Browse the repository at this point in the history
Any number of levels is supported but since the official HTML
supports only 6 levels (`h1` ... `h6`) an artifical limitation was added.

This change affects two options: `toc_level` and `ordered_chapter_level`.
Both can be set to a value between 0 and 6 (inclusive) where
0 means the feature is disabled.

If either of the options is set to a value greater than 6 then
6 levels of headers are processed and a warning is emitted.
  • Loading branch information
oliora committed Oct 24, 2024
1 parent ccf20e5 commit 1953719
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 82 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This plugin is inspired by [MkDocs PDF Export Plugin][mkdocs-pdf-export-plugin].
## Features

* Cover and Table of Contents integrated in the PDF
* Automatically numbers on heading(h1-h3).
* Automatically numbers on heading(h1-h6).
* Shift down sub-page headings level.
* using [WeasyPrint][weasyprint].

Expand Down Expand Up @@ -80,6 +80,7 @@ plugins:
#excludes_children:
# - 'release-notes/:upgrading'
# - 'release-notes/:changelog'
#ignore_top_header: false
#
#exclude_pages:
# - 'bugs/'
Expand Down Expand Up @@ -181,19 +182,25 @@ plugins:
* `toc_level`
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `3`.
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `6`.
**default**: `3`
* `ordered_chapter_level`
Set the level of heading number addition. This value is enabled in the range of from `1` to `3`.
Set the level of heading number addition. This value is enabled in the range of from `1` to `6`.
**default**: `3`
* `excludes_children`
Set the page `id` of `nav` url. If the `id` matches in this list, it will be excluded from the heading number addition and table of contents.
**default**: `[]`
* `ignore_top_header`
Set this value to `true` to skip `h1` headers from being numbered (`ordered_chapter_level`) and
included into _Table of Content_ (`toc_level`).
**default**: `false`
##### for Page
* `exclude_pages`
Expand Down
2 changes: 2 additions & 0 deletions mkdocs_with_pdf/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Options(object):
('toc_level', config_options.Type(int, default=2)),
('ordered_chapter_level', config_options.Type(int, default=3)),
('excludes_children', config_options.Type(list, default=[])),
('ignore_top_header', config_options.Type(bool, default=False)),

('exclude_pages', config_options.Type(list, default=[])),
('convert_iframe', config_options.Type(list, default=[])),
Expand Down Expand Up @@ -81,6 +82,7 @@ def __init__(self, local_config, config, logger: logging):
self.toc_level = local_config['toc_level']
self.ordered_chapter_level = local_config['ordered_chapter_level']
self.excludes_children = local_config['excludes_children']
self.ignore_top_header = local_config['ignore_top_header']

# Page
self.exclude_pages = local_config['exclude_pages']
Expand Down
189 changes: 110 additions & 79 deletions mkdocs_with_pdf/toc.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,44 @@
from bs4 import PageElement, Tag
from dataclasses import dataclass, field
from typing import Any, List, Tuple
from bs4 import BeautifulSoup, Tag

from .options import Options
from .utils.soup_util import clone_element


def make_indexes(soup: PageElement, options: Options) -> None:
_MAX_HEADER_LEVEL = 6 # <h1> ... <h6>


@dataclass
class _HeaderTree:
""" Normalized tree of document headers. Missed levels have `element` set to `None` """
element: Tag | None
subheaders: List['_HeaderTree'] = field(default_factory=list)


def make_indexes(soup: BeautifulSoup, options: Options) -> None:
""" Generate ordered chapter number and TOC of document.
Arguments:
soup {BeautifulSoup} -- DOM object of Document.
options {Options} -- The options of this sequence.
"""

# Step 1: (re)ordered headdings
# Step 1: (re)ordered headings
_inject_heading_order(soup, options)

# Step 2: generate toc page
level = options.toc_level
if level < 1 or level > 3:
start_level = 1 if options.ignore_top_header else 0
stop_level = options.toc_level
if stop_level <= start_level:
return
if stop_level > _MAX_HEADER_LEVEL:
options.logger.warning(f'Ignore `toc_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
stop_level = _MAX_HEADER_LEVEL

options.logger.info(
f'Generate a table of contents up to heading level {level}.')
options.logger.info(f'Generate a table of contents from h{start_level + 1} to h{stop_level}')

h1li = None
h2ul = h2li = h3ul = None
exclude_lv2 = exclude_lv3 = False

def makeLink(h: Tag) -> Tag:
def make_link(h: Tag) -> Tag:
li = soup.new_tag('li')
ref = h.get('id', '')
a = soup.new_tag('a', href=f'#{ref}')
Expand All @@ -40,98 +51,118 @@ def makeLink(h: Tag) -> Tag:
options.logger.debug(f"| [{h.get_text(separator=' ')}]({ref})")
return li

def create_toc(headers: List[_HeaderTree], parent: Tag):
ul_tag = soup.new_tag('ul')
parent.append(ul_tag)
for header in headers:
if header.element is not None:
link_tag = make_link(header.element)
else:
options.logger.warning(f'Adding missed header to TOC')
link_tag = soup.new_tag('li')
ul_tag.append(link_tag)
if len(header.subheaders) > 0:
create_toc(header.subheaders, link_tag)

top_headers = _collect_headers(soup, options, start_level, stop_level)

toc = soup.new_tag('article', id='doc-toc')
title = soup.new_tag('h1')
title.append(soup.new_string(options.toc_title))
toc.append(title)

h1ul = soup.new_tag('ul')
toc.append(h1ul)

headings = soup.find_all(['h1', 'h2', 'h3'])
for h in headings:
create_toc(top_headers, toc)
soup.body.insert(0, toc)

if h.name == 'h1':

h1li = makeLink(h)
h1ul.append(h1li)
h2ul = h2li = h3ul = None
def _set_list_elements(l: List[Any], value: Any, start: int, end: int | None = None) -> None:
for i in range(start, end if end is not None else len(l)):
l[i] = value

exclude_lv2 = _is_exclude(h.get('id', None), options)

elif not exclude_lv2 and h.name == 'h2' and level >= 2:
def _collect_headers(soup: BeautifulSoup, options: Options, start_level: int, stop_level: int) -> List[_HeaderTree]:
"""Collect document headers.
Retuns a list of top headers with their subheaders
Levels are counted from zero i.e. zero level corresponds to h1
"""
assert 0 <= start_level < stop_level
assert 0 < stop_level <= _MAX_HEADER_LEVEL

if not h2ul:
h2ul = soup.new_tag('ul')
h1li.append(h2ul)
h2li = makeLink(h)
h2ul.append(h2li)
h3ul = None
top_headers: List[_HeaderTree] = []

exclude_lv3 = _is_exclude(h.get('id', None), options)
header_levels: List[_HeaderTree | None] = [None] * stop_level
exclude_levels: List[bool] = [False] * stop_level

elif not exclude_lv2 and not exclude_lv3 \
and h.name == 'h3' and level >= 3:
html_headers = soup.find_all([f'h{i + 1}' for i in range(start_level, stop_level)])
for h in html_headers:
level = int(h.name[1:]) - 1

if not h2li:
continue
if not h3ul:
h3ul = soup.new_tag('ul')
h2li.append(h3ul)
h3li = makeLink(h)
h3ul.append(h3li)
exclude_levels[level] = _is_exclude(h.get('id', None), options)
_set_list_elements(exclude_levels, False, level + 1)

else:
if any(exclude_levels[:level]):
continue
pass

soup.body.insert(0, toc)

header = _HeaderTree(h)

def _inject_heading_order(soup: Tag, options: Options):

level = options.ordered_chapter_level
if level < 1 or level > 3:
return

options.logger.info(f'Number headings up to level {level}.')

h1n = h2n = h3n = 0
exclude_lv2 = exclude_lv3 = False

headings = soup.find_all(['h1', 'h2', 'h3'])
for h in headings:

if h.name == 'h1':

h1n += 1
h2n = h3n = 0
prefix = f'{h1n}. '

exclude_lv2 = _is_exclude(h.get('id', None), options)
if level == start_level:
top_headers.append(header)
else:
parent_header = header_levels[level - 1]
if parent_header is None:
# Add skipped levels
for i in range(start_level, level):
if header_levels[i] is not None:
continue

elif not exclude_lv2 and h.name == 'h2' and level >= 2:
missed_header = _HeaderTree(None)
if i == start_level:
top_headers.append(missed_header)
else:
parent_header = header_levels[i - 1]
assert parent_header is not None
parent_header.subheaders.append(missed_header)
header_levels[i] = missed_header

h2n += 1
h3n = 0
prefix = f'{h1n}.{h2n} '
parent_header = header_levels[level - 1]

exclude_lv3 = _is_exclude(h.get('id', None), options)
assert parent_header is not None
parent_header.subheaders.append(header)

elif not exclude_lv2 and not exclude_lv3 \
and h.name == 'h3' and level >= 3:
header_levels[level] = header
_set_list_elements(header_levels, None, level + 1)

h3n += 1
prefix = f'{h1n}.{h2n}.{h3n} '
return top_headers

else:
continue

options.logger.debug(f"| [{prefix} {h.text}]({h.get('id', '(none)')})")
def _inject_heading_order(soup: BeautifulSoup, options: Options) -> None:
start_level = 1 if options.ignore_top_header else 0
stop_level = options.ordered_chapter_level
if stop_level <= start_level:
return
if stop_level > _MAX_HEADER_LEVEL:
options.logger.warning(f'Ignore `ordered_chapter_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
stop_level = _MAX_HEADER_LEVEL

options.logger.info(f'Number headers from h{start_level + 1} to h{stop_level}')

def inject_order(headers: List[_HeaderTree], numbers_prefix: List[int] = []):
assert len(numbers_prefix) < _MAX_HEADER_LEVEL
for i, header in enumerate(headers):
prefix = numbers_prefix + [i + 1]
prefix_str = '.'.join(str(n) for n in prefix)
if header.element is not None:
options.logger.debug(f"| [{prefix_str} {header.element}]({header.element.get('id', '(none)')})")
nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
nm_tag.append(prefix_str + ' ')
header.element.insert(0, nm_tag)
else:
options.logger.warning(f'Assigned number for a missed header {prefix_str}')
if len(header.subheaders) > 0:
inject_order(header.subheaders, prefix)

nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
nm_tag.append(prefix)
h.insert(0, nm_tag)
top_headers = _collect_headers(soup, options, start_level, stop_level)
inject_order(top_headers)


def _is_exclude(url: str, options: Options) -> bool:
Expand Down

0 comments on commit 1953719

Please sign in to comment.