mirror of
https://github.com/Dvorinka/Devour.git
synced 2026-06-03 20:13:03 +00:00
first commit
This commit is contained in:
@@ -0,0 +1,307 @@
|
||||
package tsdocs
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseURL string
|
||||
}
|
||||
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
baseURL: "https://www.typescriptlang.org",
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
module := &Module{
|
||||
DocURL: docURL,
|
||||
FetchedAt: time.Now(),
|
||||
}
|
||||
|
||||
module.Name = p.extractModuleName(doc)
|
||||
module.Doc = p.extractModuleDoc(doc)
|
||||
module.Interfaces = p.extractInterfaces(doc, module.Name, docURL)
|
||||
module.Types = p.extractTypeAliases(doc, module.Name, docURL)
|
||||
module.Functions = p.extractFunctions(doc, module.Name, docURL)
|
||||
module.Classes = p.extractClasses(doc, module.Name, docURL)
|
||||
module.Enums = p.extractEnums(doc, module.Name, docURL)
|
||||
module.Variables = p.extractVariables(doc, module.Name, docURL)
|
||||
|
||||
return module, nil
|
||||
}
|
||||
|
||||
func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var results []*SearchResult
|
||||
|
||||
doc.Find(".search-result, .ais-Hits-item, li.result").Each(func(i int, s *goquery.Selection) {
|
||||
result := &SearchResult{}
|
||||
|
||||
link := s.Find("a").First()
|
||||
result.Name = strings.TrimSpace(link.Text())
|
||||
|
||||
if href, exists := link.Attr("href"); exists {
|
||||
result.DocURL = resolveURL(p.baseURL, href)
|
||||
}
|
||||
|
||||
result.Doc = strings.TrimSpace(s.Find(".summary, p, .description").First().Text())
|
||||
|
||||
if s.HasClass("interface") || strings.Contains(s.Text(), "interface") {
|
||||
result.Kind = "interface"
|
||||
} else if s.HasClass("type") || strings.Contains(s.Text(), "type") {
|
||||
result.Kind = "type"
|
||||
} else if s.HasClass("function") || strings.Contains(s.Text(), "function") {
|
||||
result.Kind = "function"
|
||||
} else if s.HasClass("class") || strings.Contains(s.Text(), "class") {
|
||||
result.Kind = "class"
|
||||
} else {
|
||||
result.Kind = "doc"
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleName(doc *goquery.Document) string {
|
||||
title := doc.Find("h1, .title, .page-title").First().Text()
|
||||
return strings.TrimSpace(title)
|
||||
}
|
||||
|
||||
func (p *Parser) extractModuleDoc(doc *goquery.Document) string {
|
||||
docblock := doc.Find(".markdown p:first-of-type, .content p:first-of-type, #main p").First()
|
||||
return strings.TrimSpace(docblock.Text())
|
||||
}
|
||||
|
||||
func (p *Parser) extractInterfaces(doc *goquery.Document, moduleName string, docURL string) []*Interface {
|
||||
var interfaces []*Interface
|
||||
|
||||
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if !strings.Contains(strings.ToLower(text), "interface") {
|
||||
return
|
||||
}
|
||||
|
||||
iface := &Interface{}
|
||||
|
||||
nameEl := s.Find("code, .name, a").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
iface.Name = strings.TrimSpace(nameEl.Text())
|
||||
iface.Name = strings.TrimSuffix(iface.Name, "<")
|
||||
iface.Name = strings.Split(iface.Name, "<")[0]
|
||||
iface.Name = strings.TrimSpace(iface.Name)
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
iface.DocURL = docURL + "#" + id
|
||||
}
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3") {
|
||||
if next.Is("p") && iface.Doc == "" {
|
||||
iface.Doc = strings.TrimSpace(next.Text())
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
if iface.Name != "" && iface.Name != "interface" {
|
||||
interfaces = append(interfaces, iface)
|
||||
}
|
||||
})
|
||||
|
||||
return interfaces
|
||||
}
|
||||
|
||||
func (p *Parser) extractTypeAliases(doc *goquery.Document, moduleName string, docURL string) []*TypeAlias {
|
||||
var types []*TypeAlias
|
||||
|
||||
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if !strings.Contains(strings.ToLower(text), "type") {
|
||||
return
|
||||
}
|
||||
|
||||
ta := &TypeAlias{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
ta.Name = strings.TrimSpace(nameEl.Text())
|
||||
ta.Name = strings.TrimSuffix(ta.Name, "<")
|
||||
ta.Name = strings.Split(ta.Name, "<")[0]
|
||||
ta.Name = strings.TrimSpace(ta.Name)
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
ta.DocURL = docURL + "#" + id
|
||||
}
|
||||
|
||||
if ta.Name != "" && ta.Name != "type" {
|
||||
types = append(types, ta)
|
||||
}
|
||||
})
|
||||
|
||||
return types
|
||||
}
|
||||
|
||||
func (p *Parser) extractFunctions(doc *goquery.Document, moduleName string, docURL string) []*Function {
|
||||
var functions []*Function
|
||||
|
||||
doc.Find("h2, h3, .context-item, .api-item, pre code").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if !strings.Contains(text, "function") && !strings.Contains(text, "(") {
|
||||
return
|
||||
}
|
||||
|
||||
fn := &Function{}
|
||||
|
||||
sigText := text
|
||||
if idx := strings.Index(sigText, "("); idx > 0 {
|
||||
prefix := sigText[:idx]
|
||||
parts := strings.Fields(prefix)
|
||||
if len(parts) > 0 {
|
||||
fn.Name = parts[len(parts)-1]
|
||||
}
|
||||
fn.Signature = strings.TrimSpace(sigText)
|
||||
}
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
fn.DocURL = docURL + "#" + id
|
||||
}
|
||||
|
||||
next := s.Next()
|
||||
for next.Length() > 0 && !next.Is("h2, h3, pre") {
|
||||
if next.Is("p") && fn.Doc == "" {
|
||||
fn.Doc = strings.TrimSpace(next.Text())
|
||||
}
|
||||
next = next.Next()
|
||||
}
|
||||
|
||||
if fn.Name != "" {
|
||||
functions = append(functions, fn)
|
||||
}
|
||||
})
|
||||
|
||||
return functions
|
||||
}
|
||||
|
||||
func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class {
|
||||
var classes []*Class
|
||||
|
||||
doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if !strings.Contains(strings.ToLower(text), "class") {
|
||||
return
|
||||
}
|
||||
|
||||
class := &Class{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
class.Name = strings.TrimSpace(nameEl.Text())
|
||||
class.Name = strings.TrimSuffix(class.Name, "<")
|
||||
class.Name = strings.Split(class.Name, "<")[0]
|
||||
class.Name = strings.TrimSpace(class.Name)
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
class.DocURL = docURL + "#" + id
|
||||
}
|
||||
|
||||
if class.Name != "" && class.Name != "class" {
|
||||
classes = append(classes, class)
|
||||
}
|
||||
})
|
||||
|
||||
return classes
|
||||
}
|
||||
|
||||
func (p *Parser) extractEnums(doc *goquery.Document, moduleName string, docURL string) []*Enum {
|
||||
var enums []*Enum
|
||||
|
||||
doc.Find("h2, h3, .context-item").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if !strings.Contains(strings.ToLower(text), "enum") {
|
||||
return
|
||||
}
|
||||
|
||||
enum := &Enum{}
|
||||
|
||||
nameEl := s.Find("code, .name").First()
|
||||
if nameEl.Length() == 0 {
|
||||
nameEl = s
|
||||
}
|
||||
enum.Name = strings.TrimSpace(nameEl.Text())
|
||||
|
||||
if id, exists := s.Attr("id"); exists {
|
||||
enum.DocURL = docURL + "#" + id
|
||||
}
|
||||
|
||||
if enum.Name != "" && enum.Name != "enum" {
|
||||
enums = append(enums, enum)
|
||||
}
|
||||
})
|
||||
|
||||
return enums
|
||||
}
|
||||
|
||||
func (p *Parser) extractVariables(doc *goquery.Document, moduleName string, docURL string) []*Variable {
|
||||
var variables []*Variable
|
||||
|
||||
doc.Find("pre code").Each(func(_ int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if strings.Contains(text, "const ") || strings.Contains(text, "let ") || strings.Contains(text, "var ") {
|
||||
v := &Variable{}
|
||||
|
||||
if idx := strings.Index(text, "="); idx > 0 {
|
||||
decl := text[:idx]
|
||||
decl = strings.TrimPrefix(decl, "const")
|
||||
decl = strings.TrimPrefix(decl, "let")
|
||||
decl = strings.TrimPrefix(decl, "var")
|
||||
v.Name = strings.TrimSpace(decl)
|
||||
}
|
||||
|
||||
if v.Name != "" {
|
||||
variables = append(variables, v)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return variables
|
||||
}
|
||||
|
||||
func resolveURL(base string, href string) string {
|
||||
if strings.HasPrefix(href, "http") {
|
||||
return href
|
||||
}
|
||||
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return href
|
||||
}
|
||||
|
||||
return baseURL.ResolveReference(hrefURL).String()
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
package tsdocs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const testModulePageHTML = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<h1>TypeScript Handbook</h1>
|
||||
<p class="content">TypeScript is a strongly typed programming language that builds on JavaScript.</p>
|
||||
|
||||
<h2 id="string">interface String</h2>
|
||||
<p>Allows manipulation and formatting of text strings.</p>
|
||||
|
||||
<h3 id="concat">concat(...strings: string[]): string</h3>
|
||||
<p>Returns a string that contains the concatenation of two or more strings.</p>
|
||||
|
||||
<h2 id="Array">interface Array<T></h2>
|
||||
<p>An array of values.</p>
|
||||
|
||||
<pre><code>function identity<T>(arg: T): T {
|
||||
return arg;
|
||||
}</code></pre>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func TestParseModulePage(t *testing.T) {
|
||||
parser := NewParser()
|
||||
module, err := parser.ParseModulePage(testModulePageHTML, "https://www.typescriptlang.org/docs/handbook/")
|
||||
if err != nil {
|
||||
t.Fatalf("ParseModulePage failed: %v", err)
|
||||
}
|
||||
|
||||
if module.Name == "" {
|
||||
t.Error("Expected non-empty module name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractInterfaces(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
interfaces := parser.extractInterfaces(doc, "typescript", "https://www.typescriptlang.org/docs/test")
|
||||
|
||||
for _, iface := range interfaces {
|
||||
if iface.Name == "interface" {
|
||||
t.Error("Should not include 'interface' as a name")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFunctions(t *testing.T) {
|
||||
parser := NewParser()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(testModulePageHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse HTML: %v", err)
|
||||
}
|
||||
|
||||
functions := parser.extractFunctions(doc, "typescript", "https://www.typescriptlang.org/docs/test")
|
||||
|
||||
for _, fn := range functions {
|
||||
if fn.Name != "" && fn.Signature != "" {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
base string
|
||||
href string
|
||||
expected string
|
||||
}{
|
||||
{"https://www.typescriptlang.org", "/docs/handbook", "https://www.typescriptlang.org/docs/handbook"},
|
||||
{"https://www.typescriptlang.org", "https://example.com/page", "https://example.com/page"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.href, func(t *testing.T) {
|
||||
got := resolveURL(tt.base, tt.href)
|
||||
if got != tt.expected {
|
||||
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.href, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
// Package tsdocs provides parsing and extraction for TypeScript documentation
|
||||
// from typescriptlang.org.
|
||||
package tsdocs
|
||||
|
||||
import "time"
|
||||
|
||||
// Module represents a TypeScript module's documentation.
|
||||
type Module struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Interfaces []*Interface `json:"interfaces,omitempty"`
|
||||
Types []*TypeAlias `json:"types,omitempty"`
|
||||
Functions []*Function `json:"functions,omitempty"`
|
||||
Classes []*Class `json:"classes,omitempty"`
|
||||
Variables []*Variable `json:"variables,omitempty"`
|
||||
Enums []*Enum `json:"enums,omitempty"`
|
||||
Namespaces []*Namespace `json:"namespaces,omitempty"`
|
||||
FetchedAt time.Time `json:"fetched_at"`
|
||||
}
|
||||
|
||||
// Interface represents a TypeScript interface.
|
||||
type Interface struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
Extends []string `json:"extends,omitempty"`
|
||||
Properties []*Property `json:"properties,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
CallSignatures []*Signature `json:"call_signatures,omitempty"`
|
||||
IndexSignatures []*Signature `json:"index_signatures,omitempty"`
|
||||
TypeParams []*TypeParam `json:"type_params,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// TypeAlias represents a TypeScript type alias.
|
||||
type TypeAlias struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Type string `json:"type"`
|
||||
TypeParams []*TypeParam `json:"type_params,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Class represents a TypeScript class.
|
||||
type Class struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
Extends string `json:"extends,omitempty"`
|
||||
Implements []string `json:"implements,omitempty"`
|
||||
Properties []*Property `json:"properties,omitempty"`
|
||||
Methods []*Method `json:"methods,omitempty"`
|
||||
Constructors []*Method `json:"constructors,omitempty"`
|
||||
TypeParams []*TypeParam `json:"type_params,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Property represents a class or interface property.
|
||||
type Property struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
Optional bool `json:"optional"`
|
||||
ReadOnly bool `json:"read_only"`
|
||||
Default string `json:"default,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Method represents a class or interface method.
|
||||
type Method struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
ReturnType string `json:"return_type,omitempty"`
|
||||
TypeParams []*TypeParam `json:"type_params,omitempty"`
|
||||
Signature string `json:"signature"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
IsStatic bool `json:"is_static"`
|
||||
IsAbstract bool `json:"is_abstract"`
|
||||
}
|
||||
|
||||
// Function represents a standalone function.
|
||||
type Function struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
ReturnType string `json:"return_type,omitempty"`
|
||||
TypeParams []*TypeParam `json:"type_params,omitempty"`
|
||||
Signatures []*Signature `json:"signatures,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Signature represents a function/method signature.
|
||||
type Signature struct {
|
||||
Parameters []*Parameter `json:"parameters,omitempty"`
|
||||
ReturnType string `json:"return_type,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
// Parameter represents a function/method parameter.
|
||||
type Parameter struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Optional bool `json:"optional"`
|
||||
Default string `json:"default,omitempty"`
|
||||
Rest bool `json:"rest"`
|
||||
}
|
||||
|
||||
// TypeParam represents a type parameter (generic).
|
||||
type TypeParam struct {
|
||||
Name string `json:"name"`
|
||||
Constraint string `json:"constraint,omitempty"`
|
||||
Default string `json:"default,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
// Variable represents a const/let/var declaration.
|
||||
type Variable struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Value string `json:"value,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// Enum represents a TypeScript enum.
|
||||
type Enum struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Modifiers []string `json:"modifiers,omitempty"`
|
||||
Members []*EnumMember `json:"members,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Deprecated string `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// EnumMember represents an enum member.
|
||||
type EnumMember struct {
|
||||
Name string `json:"name"`
|
||||
Value string `json:"value,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
}
|
||||
|
||||
// Namespace represents a TypeScript namespace.
|
||||
type Namespace struct {
|
||||
Name string `json:"name"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
Interfaces []*Interface `json:"interfaces,omitempty"`
|
||||
Types []*TypeAlias `json:"types,omitempty"`
|
||||
Functions []*Function `json:"functions,omitempty"`
|
||||
Classes []*Class `json:"classes,omitempty"`
|
||||
Variables []*Variable `json:"variables,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
}
|
||||
|
||||
// SearchResult represents a search result.
|
||||
type SearchResult struct {
|
||||
Name string `json:"name"`
|
||||
Kind string `json:"kind"` // interface, type, function, class, variable, enum
|
||||
Module string `json:"module,omitempty"`
|
||||
Doc string `json:"doc,omitempty"`
|
||||
DocURL string `json:"doc_url"`
|
||||
Score int `json:"score"`
|
||||
}
|
||||
Reference in New Issue
Block a user