Skip to content

Commit

Permalink
Merge pull request #163 from wasd96040501/feat/markdown
Browse files Browse the repository at this point in the history
Add markdown support
  • Loading branch information
smacker authored Jun 14, 2024
2 parents c5d1f3f + bfabf0b commit 0ac8d7d
Show file tree
Hide file tree
Showing 15 changed files with 137,723 additions and 0 deletions.
11 changes: 11 additions & 0 deletions _automation/grammars.json
Original file line number Diff line number Diff line change
Expand Up @@ -322,5 +322,16 @@
"reference": "v0.5.0",
"revision": "6129a83eeec7d6070b1c0567ec7ce3509ead607c",
"updateBasedOn": "tag"
},
{
"language": "markdown",
"url": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
"files": [
"parser.c",
"scanner.c"
],
"reference": "v0.2.3",
"revision": "62516e8c78380e3b51d5b55727995d2c511436d8",
"updateBasedOn": "tag"
}
]
30 changes: 30 additions & 0 deletions _automation/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ func (s *UpdateService) downloadGrammar(ctx context.Context, g *Grammar) {
s.downloadYaml(ctx, g)
case "php":
s.downloadPhp(ctx, g)
case "markdown":
s.downloadMarkdown(ctx, g)
default:
s.defaultGrammarDownload(ctx, g)
}
Expand Down Expand Up @@ -434,6 +436,34 @@ func (s *UpdateService) downloadTypescript(ctx context.Context, g *Grammar) {
}
}

// markdown is special as it contains 2 different grammars
func (s *UpdateService) downloadMarkdown(ctx context.Context, g *Grammar) {
url := g.ContentURL()

langs := []string{"tree-sitter-markdown", "tree-sitter-markdown-inline"}
for _, lang := range langs {
s.makeDir(ctx, fmt.Sprintf("%s/%s", g.Language, lang))

s.downloadFile(
ctx,
fmt.Sprintf("%s/%s/%s/src/tree_sitter/parser.h", url, g.Revision, lang),
fmt.Sprintf("%s/%s/parser.h", g.Language, lang),
nil,
)

for _, f := range g.Files {
s.downloadFile(
ctx,
fmt.Sprintf("%s/%s/%s/src/%s", url, g.Revision, lang, f),
fmt.Sprintf("%s/%s/%s", g.Language, lang, f),
map[string]string{
`"tree_sitter/parser.h"`: `"parser.h"`,
},
)
}
}
}

// for yaml grammar scanner.cc includes schema.generated.cc file
// it causes cgo to compile schema.generated.cc twice and throw duplicate symbols error
func (s *UpdateService) downloadYaml(ctx context.Context, g *Grammar) {
Expand Down
13 changes: 13 additions & 0 deletions bindings.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,10 @@ func (t SymbolType) String() string {
return symbolTypeNames[t]
}

func (n Node) ID() uintptr {
return uintptr(n.c.id)
}

// StartByte returns the node's start byte.
func (n Node) StartByte() uint32 {
return uint32(C.ts_node_start_byte(n.c))
Expand Down Expand Up @@ -433,6 +437,15 @@ func (n Node) EndPoint() Point {
}
}

func (n Node) Range() Range {
return Range{
StartByte: n.StartByte(),
EndByte: n.EndByte(),
StartPoint: n.StartPoint(),
EndPoint: n.EndPoint(),
}
}

// Symbol returns the node's type as a Symbol.
func (n Node) Symbol() Symbol {
return C.ts_node_symbol(n.c)
Expand Down
156 changes: 156 additions & 0 deletions markdown/binding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package markdown

import (
"context"

sitter "github.com/smacker/go-tree-sitter"
tree_sitter_markdown "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown"
tree_sitter_markdown_inline "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown-inline"
)

type MarkdownTree struct {
blockTree *sitter.Tree
inlineTrees []*sitter.Tree
inlineIndices map[uintptr]int
}

func (t *MarkdownTree) Edit(edit sitter.EditInput) {
t.blockTree.Edit(edit)
for _, tree := range t.inlineTrees {
tree.Edit(edit)
}
}

func (t *MarkdownTree) BlockTree() *sitter.Tree {
return t.blockTree
}

func (t *MarkdownTree) InlineTree(parent *sitter.Node) *sitter.Tree {
if parent == nil {
return nil
}

index, ok := t.inlineIndices[parent.ID()]
if ok {
return t.inlineTrees[index]
}

return nil
}

func (t *MarkdownTree) InlineRootNode(parent *sitter.Node) *sitter.Node {
tree := t.InlineTree(parent)
if tree == nil {
return nil
}

return tree.RootNode()
}

func (t *MarkdownTree) InlineTrees() []*sitter.Tree {
return t.inlineTrees
}

func (t *MarkdownTree) Iter(f func(node *Node) bool) {
root := t.blockTree.RootNode()
t.iter(&Node{root, t.InlineRootNode(root)}, f)
}

func (t *MarkdownTree) iter(node *Node, f func(node *Node) bool) (goNext bool) {
goNext = f(node)
if !goNext {
return goNext
}

childCount := node.NamedChildCount()
for i := 0; i < int(childCount); i++ {
child := node.NamedChild(i)

goNext = t.iter(&Node{Node: child, Inline: t.InlineRootNode(child)}, f)
if !goNext {
return goNext
}
}

return true
}

type Node struct {
*sitter.Node
Inline *sitter.Node
}

func ParseCtx(ctx context.Context, oldTree *MarkdownTree, content []byte) (*MarkdownTree, error) {
p := sitter.NewParser()
p.SetLanguage(tree_sitter_markdown.GetLanguage())

var old *sitter.Tree
if oldTree != nil {
old = oldTree.blockTree
}
tree, err := p.ParseCtx(ctx, old, content)
if err != nil {
return nil, err
}

res := &MarkdownTree{
blockTree: tree,
inlineTrees: []*sitter.Tree{},
inlineIndices: map[uintptr]int{},
}

p.SetLanguage(tree_sitter_markdown_inline.GetLanguage())

q, err := sitter.NewQuery([]byte(`(inline) @inline`), tree_sitter_markdown.GetLanguage())
if err != nil {
return nil, err
}

qc := sitter.NewQueryCursor()
qc.Exec(q, tree.RootNode())

idx := int(0)
for {
match, ok := qc.NextMatch()
if !ok {
break
}

for _, capture := range match.Captures {
r := capture.Node.Range()
ranges := []sitter.Range{}
for i := 0; i < int(capture.Node.NamedChildCount()); i++ {
child := capture.Node.NamedChild(i)
childRange := child.Range()
ranges = append(ranges, sitter.Range{
StartPoint: r.StartPoint,
StartByte: r.StartByte,
EndPoint: childRange.EndPoint,
EndByte: childRange.EndByte,
})

r.StartPoint = childRange.EndPoint
r.StartByte = childRange.EndByte
}

ranges = append(ranges, r)
p.SetIncludedRanges(ranges)
var old *sitter.Tree
if oldTree != nil && idx < len(oldTree.inlineTrees) {
old = oldTree.inlineTrees[idx]
}

inlineTree, err := p.ParseCtx(ctx, old, content)
if err != nil {
return nil, err
}

res.inlineTrees = append(res.inlineTrees, inlineTree)
res.inlineIndices[capture.Node.ID()] = idx
idx++
}
}
qc.Close()

return res, nil
}
101 changes: 101 additions & 0 deletions markdown/binding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package markdown_test

import (
"context"
"testing"

"github.com/smacker/go-tree-sitter/markdown"
"github.com/stretchr/testify/assert"
)

func TestMarkdown(t *testing.T) {
assert := assert.New(t)

content := "# Hello\n- This is a image: ![image](https://example.com/image.jpg \"a image\")"
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
assert.NoError(err)

assert.Equal(
"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))",
tree.BlockTree().RootNode().String(),
)

assert.Equal(
"(inline)",
tree.InlineTrees()[0].RootNode().String(),
)

assert.Equal(
"(inline (image (image_description) (link_destination) (link_title)))",
tree.InlineTrees()[1].RootNode().String(),
)
}

func TestIter(t *testing.T) {
assert := assert.New(t)

content := "# Hello\n- This two image: ![image](https://example.com/image.jpg \"a image\"), ![apple](https://example.com/apple.jpg \"a apple\")"
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
assert.NoError(err)

type BlockWithInline struct {
Node string
InlineNode string
}

expected := []BlockWithInline{
{"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))", ""},
{"(section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline)))))", ""},
{"(atx_heading (atx_h1_marker) heading_content: (inline))", ""},
{"(atx_h1_marker)", ""},
{"(inline)", "(inline)"},
{"(list (list_item (list_marker_minus) (paragraph (inline))))", ""},
{"(list_item (list_marker_minus) (paragraph (inline)))", ""},
{"(list_marker_minus)", ""},
{"(paragraph (inline))", ""},
{"(inline)", "(inline (image (image_description) (link_destination) (link_title)) (image (image_description) (link_destination) (link_title)))"},
}

i := int(0)
tree.Iter(func(node *markdown.Node) bool {
assert.Equal(expected[i].Node, node.String(), "node mismatch. idx: %d", i)
if expected[i].InlineNode != "" || node.Inline != nil {
assert.Equal(expected[i].InlineNode, node.Inline.String(), "inline node mismatch. idx: %d", i)
}

i++
return true
})
}

func TestIterStop(t *testing.T) {
assert := assert.New(t)

content := "# Hello\n- This two image: ![image](https://example.com/image.jpg \"a image\"), ![apple](https://example.com/apple.jpg \"a apple\")"
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
assert.NoError(err)

type BlockWithInline struct {
Node string
InlineNode string
}

expected := []string{
"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))",
"(section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline)))))",
"(atx_heading (atx_h1_marker) heading_content: (inline))",
}

collected := []string{}
tree.Iter(func(node *markdown.Node) bool {
collected = append(collected, node.String())

if node.Type() == "document" || node.Type() == "section" {
return true
}

return false
})

assert.Equal(expected, collected)
}
15 changes: 15 additions & 0 deletions markdown/tree-sitter-markdown-inline/binding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package tree_sitter_markdown_inline

//#include "parser.h"
//TSLanguage *tree_sitter_markdown_inline();
import "C"
import (
"unsafe"

sitter "github.com/smacker/go-tree-sitter"
)

func GetLanguage() *sitter.Language {
ptr := unsafe.Pointer(C.tree_sitter_markdown_inline())
return sitter.NewLanguage(ptr)
}
21 changes: 21 additions & 0 deletions markdown/tree-sitter-markdown-inline/bingding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package tree_sitter_markdown_inline_test

import (
"context"
"testing"

sitter "github.com/smacker/go-tree-sitter"
tree_sitter_markdown_inline "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown-inline"
"github.com/stretchr/testify/assert"
)

func TestGrammar(t *testing.T) {
assert := assert.New(t)

n, err := sitter.ParseCtx(context.Background(), []byte("# Hello world!\n- Here is a picture: ![picture](https://example.com/picture.png)"), tree_sitter_markdown_inline.GetLanguage())
assert.NoError(err)
assert.Equal(
"(inline (image (image_description) (link_destination)))",
n.String(),
)
}
Loading

0 comments on commit 0ac8d7d

Please sign in to comment.