package tsdocs import ( "net/url" "strings" "time" "github.com/PuerkitoBio/goquery" ) type Parser struct { baseURL string } func NewParser() *Parser { return &Parser{ baseURL: "https://www.typescriptlang.org", } } func (p *Parser) ParseModulePage(html string, docURL string) (*Module, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } module := &Module{ DocURL: docURL, FetchedAt: time.Now(), } module.Name = p.extractModuleName(doc) module.Doc = p.extractModuleDoc(doc) module.Interfaces = p.extractInterfaces(doc, module.Name, docURL) module.Types = p.extractTypeAliases(doc, module.Name, docURL) module.Functions = p.extractFunctions(doc, module.Name, docURL) module.Classes = p.extractClasses(doc, module.Name, docURL) module.Enums = p.extractEnums(doc, module.Name, docURL) module.Variables = p.extractVariables(doc, module.Name, docURL) return module, nil } func (p *Parser) ParseSearchResults(html string) ([]*SearchResult, error) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return nil, err } var results []*SearchResult doc.Find(".search-result, .ais-Hits-item, li.result").Each(func(i int, s *goquery.Selection) { result := &SearchResult{} link := s.Find("a").First() result.Name = strings.TrimSpace(link.Text()) if href, exists := link.Attr("href"); exists { result.DocURL = resolveURL(p.baseURL, href) } result.Doc = strings.TrimSpace(s.Find(".summary, p, .description").First().Text()) if s.HasClass("interface") || strings.Contains(s.Text(), "interface") { result.Kind = "interface" } else if s.HasClass("type") || strings.Contains(s.Text(), "type") { result.Kind = "type" } else if s.HasClass("function") || strings.Contains(s.Text(), "function") { result.Kind = "function" } else if s.HasClass("class") || strings.Contains(s.Text(), "class") { result.Kind = "class" } else { result.Kind = "doc" } results = append(results, result) }) return results, nil } func (p *Parser) extractModuleName(doc *goquery.Document) string { title := doc.Find("h1, .title, .page-title").First().Text() return strings.TrimSpace(title) } func (p *Parser) extractModuleDoc(doc *goquery.Document) string { docblock := doc.Find(".markdown p:first-of-type, .content p:first-of-type, #main p").First() return strings.TrimSpace(docblock.Text()) } func (p *Parser) extractInterfaces(doc *goquery.Document, moduleName string, docURL string) []*Interface { var interfaces []*Interface doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) { text := s.Text() if !strings.Contains(strings.ToLower(text), "interface") { return } iface := &Interface{} nameEl := s.Find("code, .name, a").First() if nameEl.Length() == 0 { nameEl = s } iface.Name = strings.TrimSpace(nameEl.Text()) iface.Name = strings.TrimSuffix(iface.Name, "<") iface.Name = strings.Split(iface.Name, "<")[0] iface.Name = strings.TrimSpace(iface.Name) if id, exists := s.Attr("id"); exists { iface.DocURL = docURL + "#" + id } next := s.Next() for next.Length() > 0 && !next.Is("h2, h3") { if next.Is("p") && iface.Doc == "" { iface.Doc = strings.TrimSpace(next.Text()) } next = next.Next() } if iface.Name != "" && iface.Name != "interface" { interfaces = append(interfaces, iface) } }) return interfaces } func (p *Parser) extractTypeAliases(doc *goquery.Document, moduleName string, docURL string) []*TypeAlias { var types []*TypeAlias doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) { text := s.Text() if !strings.Contains(strings.ToLower(text), "type") { return } ta := &TypeAlias{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } ta.Name = strings.TrimSpace(nameEl.Text()) ta.Name = strings.TrimSuffix(ta.Name, "<") ta.Name = strings.Split(ta.Name, "<")[0] ta.Name = strings.TrimSpace(ta.Name) if id, exists := s.Attr("id"); exists { ta.DocURL = docURL + "#" + id } if ta.Name != "" && ta.Name != "type" { types = append(types, ta) } }) return types } func (p *Parser) extractFunctions(doc *goquery.Document, moduleName string, docURL string) []*Function { var functions []*Function doc.Find("h2, h3, .context-item, .api-item, pre code").Each(func(_ int, s *goquery.Selection) { text := s.Text() if !strings.Contains(text, "function") && !strings.Contains(text, "(") { return } fn := &Function{} sigText := text if idx := strings.Index(sigText, "("); idx > 0 { prefix := sigText[:idx] parts := strings.Fields(prefix) if len(parts) > 0 { fn.Name = parts[len(parts)-1] } fn.Signature = strings.TrimSpace(sigText) } if id, exists := s.Attr("id"); exists { fn.DocURL = docURL + "#" + id } next := s.Next() for next.Length() > 0 && !next.Is("h2, h3, pre") { if next.Is("p") && fn.Doc == "" { fn.Doc = strings.TrimSpace(next.Text()) } next = next.Next() } if fn.Name != "" { functions = append(functions, fn) } }) return functions } func (p *Parser) extractClasses(doc *goquery.Document, moduleName string, docURL string) []*Class { var classes []*Class doc.Find("h2, h3, .context-item, .api-item").Each(func(_ int, s *goquery.Selection) { text := s.Text() if !strings.Contains(strings.ToLower(text), "class") { return } class := &Class{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } class.Name = strings.TrimSpace(nameEl.Text()) class.Name = strings.TrimSuffix(class.Name, "<") class.Name = strings.Split(class.Name, "<")[0] class.Name = strings.TrimSpace(class.Name) if id, exists := s.Attr("id"); exists { class.DocURL = docURL + "#" + id } if class.Name != "" && class.Name != "class" { classes = append(classes, class) } }) return classes } func (p *Parser) extractEnums(doc *goquery.Document, moduleName string, docURL string) []*Enum { var enums []*Enum doc.Find("h2, h3, .context-item").Each(func(_ int, s *goquery.Selection) { text := s.Text() if !strings.Contains(strings.ToLower(text), "enum") { return } enum := &Enum{} nameEl := s.Find("code, .name").First() if nameEl.Length() == 0 { nameEl = s } enum.Name = strings.TrimSpace(nameEl.Text()) if id, exists := s.Attr("id"); exists { enum.DocURL = docURL + "#" + id } if enum.Name != "" && enum.Name != "enum" { enums = append(enums, enum) } }) return enums } func (p *Parser) extractVariables(doc *goquery.Document, moduleName string, docURL string) []*Variable { var variables []*Variable doc.Find("pre code").Each(func(_ int, s *goquery.Selection) { text := s.Text() if strings.Contains(text, "const ") || strings.Contains(text, "let ") || strings.Contains(text, "var ") { v := &Variable{} if idx := strings.Index(text, "="); idx > 0 { decl := text[:idx] decl = strings.TrimPrefix(decl, "const") decl = strings.TrimPrefix(decl, "let") decl = strings.TrimPrefix(decl, "var") v.Name = strings.TrimSpace(decl) } if v.Name != "" { variables = append(variables, v) } } }) return variables } func resolveURL(base string, href string) string { if strings.HasPrefix(href, "http") { return href } baseURL, err := url.Parse(base) if err != nil { return href } hrefURL, err := url.Parse(href) if err != nil { return href } return baseURL.ResolveReference(hrefURL).String() }