Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature:add yuque reader #150

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions community/document-readers/yuque-reader/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba</artifactId>
<version>${revision}</version>
<relativePath>../../../pom.xml</relativePath>
</parent>

<artifactId>yuque-reader</artifactId>
<name>yuque-reader</name>
<description>yuque reader for Spring AI Alibaba</description>
<packaging>jar</packaging>
<url>https://github.com/alibaba/spring-ai-alibaba</url>
<scm>
<url>https://github.com/alibaba/spring-ai-alibaba</url>
<connection>git://github.com/alibaba/spring-ai-alibaba.git</connection>
<developerConnection>[email protected]:alibaba/spring-ai-alibaba.git</developerConnection>
</scm>

<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>
</properties>

<dependencies>

<dependency>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-core</artifactId>
<version>${project.parent.version}</version>
</dependency>

<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>

<!-- test dependencies -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-test</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-observation-test</artifactId>
<scope>test</scope>
</dependency>

</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>${maven-deploy-plugin.version}</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</build>

<repositories>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.alibaba.cloud.ai.reader.yuque;

import com.alibaba.cloud.ai.reader.DocumentParser;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.ExtractedTextFormatter;

import java.util.ArrayList;
import java.util.List;

/**
* @author YunLong
*/
public class YuQueDocumentReader implements DocumentReader {

private DocumentReader parser;

private final YuQueResource yuQueResource;

public YuQueDocumentReader(YuQueResource yuQueResource, DocumentParser parserType) {
this(yuQueResource, parserType.getParser(yuQueResource));
}

public YuQueDocumentReader(YuQueResource yuQueResource, DocumentParser parserType, ExtractedTextFormatter formatter) {
this(yuQueResource, parserType.getParser(yuQueResource, formatter));
}

public YuQueDocumentReader(YuQueResource yuQueResource, DocumentReader parser) {
this.yuQueResource = yuQueResource;
this.parser = parser;
}

@Override
public List<Document> get() {
List<Document> documents = parser.get();
String source = yuQueResource.getResourcePath();

for (Document doc : documents) {
doc.getMetadata().put(YuQueResource.SOURCE, source);
}

return documents;
}



}
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
package com.alibaba.cloud.ai.reader.yuque;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @author YunLong
*/
public class YuQueResource implements Resource {

private static final String BASE_URL = "https://www.yuque.com";

private static final String INFO_PATH = "/api/v2/hello";

private static final String DOC_DETAIL_PATH = "/api/v2/repos/%s/%s/docs/%s";

public static final String SOURCE = "source";

public static final String SUPPORT_TYPE = "Doc";

private final HttpClient httpClient;

private final InputStream inputStream;

private final URI uri;

private final String resourcePath;

private String groupLogin;
private String bookSlug;
private String id;

public YuQueResource(String yuQueToken, String resourcePath) {

this.resourcePath = resourcePath;

this.httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_2).build();

judgePathRule(resourcePath);
judgeToken(yuQueToken);

URI baseUri = URI.create(BASE_URL + DOC_DETAIL_PATH.formatted(groupLogin, bookSlug, id));

HttpRequest httpRequest = HttpRequest.newBuilder()
.header("X-Auth-Token", yuQueToken)
.uri(baseUri).GET().build();

try {
HttpResponse<String> response = this.httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
String body = response.body();
// Parse the JSON response using FastJSON
JSONObject jsonObject = JSON.parseObject(body);
JSONObject dataObject = jsonObject.getJSONObject("data");

if (dataObject == null) {
throw new RuntimeException("Invalid response format: 'data' is not an object");
}

if (!Objects.equals(dataObject.getString("type"), SUPPORT_TYPE)) {
throw new RuntimeException("Unsupported resource type, only support " + SUPPORT_TYPE);
}

inputStream = new ByteArrayInputStream(dataObject.getString("body_html").getBytes());
uri = URI.create(resourcePath);

} catch (Exception e) {
throw new RuntimeException(e);
}

}

/**
* Judge resource path rule
* Official online doc https://www.yuque.com/yuque/developer/openapi
*
* @param resourcePath
*/
private void judgePathRule(String resourcePath) {

// Determine if the path conforms to this format: https://xx.xxx.com/aa/bb/cc
String regex = "^https://[a-zA-Z0-9.-]+/([a-zA-Z0-9.-]+)/([a-zA-Z0-9.-]+)/([a-zA-Z0-9.-]+)$";

Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(resourcePath);
Assert.isTrue(matcher.matches(), "Invalid resource path");

// Extract the captured groups
this.groupLogin = matcher.group(1);
this.bookSlug = matcher.group(2);
this.id = matcher.group(3);
Assert.isTrue(StringUtils.hasText(this.groupLogin), "Invalid resource path");
Assert.isTrue(StringUtils.hasText(this.bookSlug), "Invalid resource path");
Assert.isTrue(StringUtils.hasText(this.id), "Invalid resource path");
}

/**
* judge yuQue token
*
* @param yuQueToken User/Team token
*/
private void judgeToken(String yuQueToken) {
URI uri = URI.create(BASE_URL + INFO_PATH);

HttpRequest httpRequest = HttpRequest.newBuilder()
.header("X-Auth-Token", yuQueToken)
.uri(uri).GET().build();

try {
HttpResponse<String> response = this.httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
int statusCode = response.statusCode();
Assert.isTrue(statusCode == 200, "Failed to auth YuQueToken");
} catch (Exception e) {
throw new RuntimeException(e);
}
}

public static Builder builder() {
return new Builder();
}

public static class Builder {

private String yuQueToken;

private String resourcePath;

public Builder yuQueToken(String yuQueToken) {
this.yuQueToken = yuQueToken;
return this;
}

public Builder resourcePath(String resourcePath) {
this.resourcePath = resourcePath;
return this;
}


public YuQueResource build() {
Assert.notNull(yuQueToken, "YuQueToken must not be null");
Assert.notNull(resourcePath, "ResourcePath must not be null");
return new YuQueResource(yuQueToken, resourcePath);
}
}

public String getResourcePath() {
return resourcePath;
}

@Override
public boolean exists() {
return false;
}

@Override
public URL getURL() throws IOException {
return null;
}

@Override
public URI getURI() throws IOException {
return uri;
}

@Override
public File getFile() throws IOException {
return null;
}

@Override
public long contentLength() throws IOException {
return 0;
}

@Override
public long lastModified() throws IOException {
return 0;
}

@Override
public Resource createRelative(String relativePath) throws IOException {
return null;
}

@Override
public String getFilename() {
return "";
}

@Override
public String getDescription() {
return "";
}

@Override
public InputStream getInputStream() throws IOException {
return inputStream;
}

}
Loading