Skip to content

Commit

Permalink
Allow up to 6 levels of headers in the table of content and allow to …
Browse files Browse the repository at this point in the history
…ignore h1 header

Any number of levels is supported but since the official HTML
supports only 6 levels (`h1` ... `h6`) an artifical limitation was added.

This change affects two options: `toc_level` and `ordered_chapter_level`.
Both can be set to a value between 0 and 6 (inclusive) where
0 means the feature is disabled.

If either of the options is set to a value greater than 6 then
6 levels of headers are processed and a warning is emitted.

This change also adds option 'ignore_top_header' to exclude h1
headers from the table of content and numbering.
  • Loading branch information
oliora committed Oct 24, 2024
1 parent ccf20e5 commit 1b38e06
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 82 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This plugin is inspired by [MkDocs PDF Export Plugin][mkdocs-pdf-export-plugin].
## Features

* Cover and Table of Contents integrated in the PDF
* Automatically numbers on heading(h1-h3).
* Automatically numbers on heading(h1-h6).
* Shift down sub-page headings level.
* using [WeasyPrint][weasyprint].

Expand Down Expand Up @@ -80,6 +80,7 @@ plugins:
#excludes_children:
# - 'release-notes/:upgrading'
# - 'release-notes/:changelog'
#ignore_top_header: false
#
#exclude_pages:
# - 'bugs/'
Expand Down Expand Up @@ -181,19 +182,25 @@ plugins:
* `toc_level`
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `3`.
Set the level of _Table of Content_. This value is enabled in the range of from `1` to `6`.
**default**: `3`
* `ordered_chapter_level`
Set the level of heading number addition. This value is enabled in the range of from `1` to `3`.
Set the level of heading number addition. This value is enabled in the range of from `1` to `6`.
**default**: `3`
* `excludes_children`
Set the page `id` of `nav` url. If the `id` matches in this list, it will be excluded from the heading number addition and table of contents.
**default**: `[]`
* `ignore_top_header`
Set this value to `true` to skip `h1` headers from being numbered (`ordered_chapter_level`) and
included into _Table of Content_ (`toc_level`).
**default**: `false`
##### for Page
* `exclude_pages`
Expand Down
2 changes: 2 additions & 0 deletions mkdocs_with_pdf/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Options(object):
('toc_level', config_options.Type(int, default=2)),
('ordered_chapter_level', config_options.Type(int, default=3)),
('excludes_children', config_options.Type(list, default=[])),
('ignore_top_header', config_options.Type(bool, default=False)),

('exclude_pages', config_options.Type(list, default=[])),
('convert_iframe', config_options.Type(list, default=[])),
Expand Down Expand Up @@ -81,6 +82,7 @@ def __init__(self, local_config, config, logger: logging):
self.toc_level = local_config['toc_level']
self.ordered_chapter_level = local_config['ordered_chapter_level']
self.excludes_children = local_config['excludes_children']
self.ignore_top_header = local_config['ignore_top_header']

# Page
self.exclude_pages = local_config['exclude_pages']
Expand Down
189 changes: 110 additions & 79 deletions mkdocs_with_pdf/toc.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,44 @@
from bs4 import PageElement, Tag
from dataclasses import dataclass, field
from typing import Any, List, Tuple
from bs4 import BeautifulSoup, Tag

from .options import Options
from .utils.soup_util import clone_element


def make_indexes(soup: PageElement, options: Options) -> None:
_MAX_HEADER_LEVEL = 6 # <h1> ... <h6>


@dataclass
class _HeaderTree:
""" Normalized tree of document headers. Missed levels have `element` set to `None` """
element: Tag | None
subheaders: List['_HeaderTree'] = field(default_factory=list)


def make_indexes(soup: BeautifulSoup, options: Options) -> None:
""" Generate ordered chapter number and TOC of document.
Arguments:
soup {BeautifulSoup} -- DOM object of Document.
options {Options} -- The options of this sequence.
"""

# Step 1: (re)ordered headdings
# Step 1: (re)ordered headings
_inject_heading_order(soup, options)

# Step 2: generate toc page
level = options.toc_level
if level < 1 or level > 3:
start_level = 1 if options.ignore_top_header else 0
stop_level = options.toc_level
if stop_level <= start_level:
return
if stop_level > _MAX_HEADER_LEVEL:
options.logger.warning(f'Ignore `toc_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
stop_level = _MAX_HEADER_LEVEL

options.logger.info(
f'Generate a table of contents up to heading level {level}.')
options.logger.info(f'Generate a table of contents from h{start_level + 1} to h{stop_level}')

h1li = None
h2ul = h2li = h3ul = None
exclude_lv2 = exclude_lv3 = False

def makeLink(h: Tag) -> Tag:
def make_link(h: Tag) -> Tag:
li = soup.new_tag('li')
ref = h.get('id', '')
a = soup.new_tag('a', href=f'#{ref}')
Expand All @@ -40,98 +51,118 @@ def makeLink(h: Tag) -> Tag:
options.logger.debug(f"| [{h.get_text(separator=' ')}]({ref})")
return li

def create_toc(headers: List[_HeaderTree], parent: Tag):
ul_tag = soup.new_tag('ul')
parent.append(ul_tag)
for header in headers:
if header.element is not None:
link_tag = make_link(header.element)
else:
options.logger.warning(f'Adding missed header to TOC')
link_tag = soup.new_tag('li')
ul_tag.append(link_tag)
if len(header.subheaders) > 0:
create_toc(header.subheaders, link_tag)

top_headers = _collect_headers(soup, options, start_level, stop_level)

toc = soup.new_tag('article', id='doc-toc')
title = soup.new_tag('h1')
title.append(soup.new_string(options.toc_title))
toc.append(title)

h1ul = soup.new_tag('ul')
toc.append(h1ul)

headings = soup.find_all(['h1', 'h2', 'h3'])
for h in headings:
create_toc(top_headers, toc)
soup.body.insert(0, toc)

if h.name == 'h1':

h1li = makeLink(h)
h1ul.append(h1li)
h2ul = h2li = h3ul = None
def _set_list_elements(l: List[Any], value: Any, start: int, end: int | None = None) -> None:
for i in range(start, end if end is not None else len(l)):
l[i] = value

exclude_lv2 = _is_exclude(h.get('id', None), options)

elif not exclude_lv2 and h.name == 'h2' and level >= 2:
def _collect_headers(soup: BeautifulSoup, options: Options, start_level: int, stop_level: int) -> List[_HeaderTree]:
"""Collect document headers.
Retuns a list of top headers with their subheaders
Levels are counted from zero i.e. zero level corresponds to h1
"""
assert 0 <= start_level < stop_level
assert 0 < stop_level <= _MAX_HEADER_LEVEL

if not h2ul:
h2ul = soup.new_tag('ul')
h1li.append(h2ul)
h2li = makeLink(h)
h2ul.append(h2li)
h3ul = None
top_headers: List[_HeaderTree] = []

exclude_lv3 = _is_exclude(h.get('id', None), options)
header_levels: List[_HeaderTree | None] = [None] * stop_level
exclude_levels: List[bool] = [False] * stop_level

elif not exclude_lv2 and not exclude_lv3 \
and h.name == 'h3' and level >= 3:
html_headers = soup.find_all([f'h{i + 1}' for i in range(start_level, stop_level)])
for h in html_headers:
level = int(h.name[1:]) - 1

if not h2li:
continue
if not h3ul:
h3ul = soup.new_tag('ul')
h2li.append(h3ul)
h3li = makeLink(h)
h3ul.append(h3li)
exclude_levels[level] = _is_exclude(h.get('id', None), options)
_set_list_elements(exclude_levels, False, level + 1)

else:
if any(exclude_levels[:level]):
continue
pass

soup.body.insert(0, toc)

header = _HeaderTree(h)

def _inject_heading_order(soup: Tag, options: Options):

level = options.ordered_chapter_level
if level < 1 or level > 3:
return

options.logger.info(f'Number headings up to level {level}.')

h1n = h2n = h3n = 0
exclude_lv2 = exclude_lv3 = False

headings = soup.find_all(['h1', 'h2', 'h3'])
for h in headings:

if h.name == 'h1':

h1n += 1
h2n = h3n = 0
prefix = f'{h1n}. '

exclude_lv2 = _is_exclude(h.get('id', None), options)
if level == start_level:
top_headers.append(header)
else:
parent_header = header_levels[level - 1]
if parent_header is None:
# Add skipped levels
for i in range(start_level, level):
if header_levels[i] is not None:
continue

elif not exclude_lv2 and h.name == 'h2' and level >= 2:
missed_header = _HeaderTree(None)
if i == start_level:
top_headers.append(missed_header)
else:
parent_header = header_levels[i - 1]
assert parent_header is not None
parent_header.subheaders.append(missed_header)
header_levels[i] = missed_header

h2n += 1
h3n = 0
prefix = f'{h1n}.{h2n} '
parent_header = header_levels[level - 1]

exclude_lv3 = _is_exclude(h.get('id', None), options)
assert parent_header is not None
parent_header.subheaders.append(header)

elif not exclude_lv2 and not exclude_lv3 \
and h.name == 'h3' and level >= 3:
header_levels[level] = header
_set_list_elements(header_levels, None, level + 1)

h3n += 1
prefix = f'{h1n}.{h2n}.{h3n} '
return top_headers

else:
continue

options.logger.debug(f"| [{prefix} {h.text}]({h.get('id', '(none)')})")
def _inject_heading_order(soup: BeautifulSoup, options: Options) -> None:
start_level = 1 if options.ignore_top_header else 0
stop_level = options.ordered_chapter_level
if stop_level <= start_level:
return
if stop_level > _MAX_HEADER_LEVEL:
options.logger.warning(f'Ignore `ordered_chapter_level` value {stop_level}. Use max possible {_MAX_HEADER_LEVEL} instead')
stop_level = _MAX_HEADER_LEVEL

options.logger.info(f'Number headers from h{start_level + 1} to h{stop_level}')

def inject_order(headers: List[_HeaderTree], numbers_prefix: List[int] = []):
assert len(numbers_prefix) < _MAX_HEADER_LEVEL
for i, header in enumerate(headers):
prefix = numbers_prefix + [i + 1]
prefix_str = '.'.join(str(n) for n in prefix)
if header.element is not None:
options.logger.debug(f"| [{prefix_str} {header.element}]({header.element.get('id', '(none)')})")
nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
nm_tag.append(prefix_str + ' ')
header.element.insert(0, nm_tag)
else:
options.logger.warning(f'Assigned number for a missed header {prefix_str}')
if len(header.subheaders) > 0:
inject_order(header.subheaders, prefix)

nm_tag = soup.new_tag('span', **{'class': 'pdf-order'})
nm_tag.append(prefix)
h.insert(0, nm_tag)
top_headers = _collect_headers(soup, options, start_level, stop_level)
inject_order(top_headers)


def _is_exclude(url: str, options: Options) -> bool:
Expand Down

0 comments on commit 1b38e06

Please sign in to comment.