package services import ( "bytes" "encoding/base64" "fmt" "image" "regexp" "strings" "time" "github.com/trackeep/backend/models" "gorm.io/gorm" ) // ComputerVisionService provides computer vision capabilities type ComputerVisionService struct { db *gorm.DB } // NewComputerVisionService creates a new computer vision service func NewComputerVisionService(db *gorm.DB) *ComputerVisionService { return &ComputerVisionService{db: db} } // ImageAnalysisRequest represents a request for image analysis type ImageAnalysisRequest struct { ImageData string `json:"image_data" binding:"required"` // Base64 encoded image AnalysisType string `json:"analysis_type" binding:"required"` // ocr, objects, text, faces, all FileID *uint `json:"file_id,omitempty"` } // ImageAnalysisResponse represents the result of image analysis type ImageAnalysisResponse struct { Success bool `json:"success"` Analysis map[string]interface{} `json:"analysis"` Text string `json:"text,omitempty"` Objects []ObjectDetection `json:"objects,omitempty"` Faces []FaceDetection `json:"faces,omitempty"` Metadata ImageMetadata `json:"metadata"` } // ObjectDetection represents a detected object type ObjectDetection struct { Name string `json:"name"` Confidence float64 `json:"confidence"` BoundingBox BoundingBox `json:"bounding_box"` } // FaceDetection represents a detected face type FaceDetection struct { Confidence float64 `json:"confidence"` BoundingBox BoundingBox `json:"bounding_box"` Age *int `json:"age,omitempty"` Gender *string `json:"gender,omitempty"` Emotion *string `json:"emotion,omitempty"` } // BoundingBox represents coordinates of a detected object type BoundingBox struct { X int `json:"x"` Y int `json:"y"` Width int `json:"width"` Height int `json:"height"` } // ImageMetadata represents metadata about the analyzed image type ImageMetadata struct { Width int `json:"width"` Height int `json:"height"` Format string `json:"format"` SizeBytes int `json:"size_bytes"` ColorSpace string `json:"color_space"` DominantColors []string `json:"dominant_colors"` TextDensity float64 `json:"text_density"` } // AnalyzeImage performs computer vision analysis on an image func (s *ComputerVisionService) AnalyzeImage(req ImageAnalysisRequest) (*ImageAnalysisResponse, error) { // Decode base64 image imageData, err := base64.StdEncoding.DecodeString(req.ImageData) if err != nil { return nil, fmt.Errorf("invalid base64 image data: %v", err) } // Parse image to get metadata img, format, err := image.Decode(bytes.NewReader(imageData)) if err != nil { return nil, fmt.Errorf("failed to decode image: %v", err) } bounds := img.Bounds() response := &ImageAnalysisResponse{ Success: true, Analysis: make(map[string]interface{}), Metadata: ImageMetadata{ Width: bounds.Dx(), Height: bounds.Dy(), Format: format, SizeBytes: len(imageData), ColorSpace: "RGB", // Simplified }, } // Perform requested analysis types if req.AnalysisType == "ocr" || req.AnalysisType == "all" { text, err := s.extractText(imageData) if err == nil { response.Text = text response.Analysis["text"] = text response.Analysis["word_count"] = len(strings.Fields(text)) response.Metadata.TextDensity = float64(len(text)) / float64(bounds.Dx()*bounds.Dy()) * 1000 } } if req.AnalysisType == "objects" || req.AnalysisType == "all" { objects := s.detectObjects(imageData) response.Objects = objects response.Analysis["objects"] = objects response.Analysis["object_count"] = len(objects) } if req.AnalysisType == "faces" || req.AnalysisType == "all" { faces := s.detectFaces(imageData) response.Faces = faces response.Analysis["faces"] = faces response.Analysis["face_count"] = len(faces) } if req.AnalysisType == "text" || req.AnalysisType == "all" { // Extract readable text from image text, err := s.extractText(imageData) if err == nil { response.Analysis["readable_text"] = text response.Analysis["has_text"] = len(strings.TrimSpace(text)) > 0 } } // Extract dominant colors colors := s.extractDominantColors(imageData) response.Metadata.DominantColors = colors // Save analysis to database if file ID is provided if req.FileID != nil { s.saveImageAnalysis(*req.FileID, response) } return response, nil } // extractText performs OCR on the image (simplified implementation) func (s *ComputerVisionService) extractText(imageData []byte) (string, error) { // This is a simplified OCR implementation // In a real implementation, you would use: // - Tesseract OCR // - Google Cloud Vision API // - Azure Computer Vision // - AWS Textract // For demo purposes, we'll extract text from common patterns // This is just a placeholder implementation // Try to detect common text patterns in the image // In reality, this would require actual OCR processing // Simulate OCR by returning sample text based on image analysis text := ` This is sample OCR text extracted from the image. In a real implementation, this would contain the actual text content found in the image using OCR technology. Common use cases: - Document scanning - Receipt processing - Business card reading - Screenshot text extraction ` return strings.TrimSpace(text), nil } // detectObjects performs object detection on the image func (s *ComputerVisionService) detectObjects(imageData []byte) []ObjectDetection { // This is a simplified object detection implementation // In a real implementation, you would use: // - YOLO (You Only Look Once) // - TensorFlow Object Detection API // - OpenCV DNN // - Cloud vision services // Simulate object detection with common objects objects := []ObjectDetection{ { Name: "document", Confidence: 0.95, BoundingBox: BoundingBox{X: 10, Y: 10, Width: 300, Height: 400}, }, { Name: "text", Confidence: 0.88, BoundingBox: BoundingBox{X: 20, Y: 30, Width: 280, Height: 200}, }, { Name: "logo", Confidence: 0.72, BoundingBox: BoundingBox{X: 250, Y: 20, Width: 50, Height: 50}, }, } return objects } // detectFaces performs face detection on the image func (s *ComputerVisionService) detectFaces(imageData []byte) []FaceDetection { // This is a simplified face detection implementation // In a real implementation, you would use: // - OpenCV Face Detection // - Dlib // - FaceNet // - Cloud face detection services // Simulate face detection faces := []FaceDetection{ { Confidence: 0.92, BoundingBox: BoundingBox{X: 100, Y: 80, Width: 120, Height: 150}, Age: func() *int { age := 28; return &age }(), Gender: func() *string { gender := "male"; return &gender }(), Emotion: func() *string { emotion := "happy"; return &emotion }(), }, } return faces } // extractDominantColors extracts the dominant colors from the image func (s *ComputerVisionService) extractDominantColors(imageData []byte) []string { // This is a simplified color extraction // In a real implementation, you would use: // - K-means clustering // - Color histogram analysis // - Median cut algorithm // Simulate dominant colors colors := []string{ "#FFFFFF", // White "#333333", // Dark gray "#0066CC", // Blue "#FF6600", // Orange "#00CC66", // Green } return colors } // saveImageAnalysis saves the analysis results to the database func (s *ComputerVisionService) saveImageAnalysis(fileID uint, analysis *ImageAnalysisResponse) error { // Convert analysis to JSON for storage analysisJSON := fmt.Sprintf(`{ "text": "%s", "object_count": %d, "face_count": %d, "metadata": %+v }`, analysis.Text, len(analysis.Objects), len(analysis.Faces), analysis.Metadata) // Create or update file analysis record var fileAnalysis models.FileAnalysis err := s.db.Where("file_id = ?", fileID).First(&fileAnalysis).Error if err == gorm.ErrRecordNotFound { // Create new analysis record now := time.Now() fileAnalysis = models.FileAnalysis{ FileID: fileID, AnalysisType: "computer_vision", Results: analysisJSON, Confidence: 0.85, ProcessedAt: &now, } return s.db.Create(&fileAnalysis).Error } else if err == nil { // Update existing record fileAnalysis.Results = analysisJSON now := time.Now() fileAnalysis.ProcessedAt = &now return s.db.Save(&fileAnalysis).Error } return err } // ProcessDocumentImage processes a document image for text extraction and structure func (s *ComputerVisionService) ProcessDocumentImage(imageData []byte) (*DocumentAnalysis, error) { // Extract text using OCR text, err := s.extractText(imageData) if err != nil { return nil, err } // Analyze document structure analysis := &DocumentAnalysis{ Text: text, WordCount: len(strings.Fields(text)), LineCount: len(strings.Split(text, "\n")), Language: s.detectLanguage(text), DocumentType: s.detectDocumentType(text), Sections: s.extractSections(text), Tables: s.extractTables(text), Links: s.extractLinks(text), Emails: s.extractEmails(text), PhoneNumbers: s.extractPhoneNumbers(text), } return analysis, nil } // DocumentAnalysis represents the analysis of a document image type DocumentAnalysis struct { Text string `json:"text"` WordCount int `json:"word_count"` LineCount int `json:"line_count"` Language string `json:"language"` DocumentType string `json:"document_type"` Sections []DocumentSection `json:"sections"` Tables []DocumentTable `json:"tables"` Links []string `json:"links"` Emails []string `json:"emails"` PhoneNumbers []string `json:"phone_numbers"` } // DocumentSection represents a section in a document type DocumentSection struct { Title string `json:"title"` Content string `json:"content"` Level int `json:"level"` } // DocumentTable represents a table in a document type DocumentTable struct { Headers []string `json:"headers"` Rows [][]string `json:"rows"` } // detectLanguage detects the language of the text func (s *ComputerVisionService) detectLanguage(text string) string { // Simplified language detection // In a real implementation, you would use: // - Language detection libraries // - Machine learning models // - Cloud language detection services if strings.Contains(strings.ToLower(text), "the") && strings.Contains(strings.ToLower(text), "and") { return "en" } else if strings.Contains(text, "est") && strings.Contains(text, "que") { return "es" } else if strings.Contains(text, "und") && strings.Contains(text, "der") { return "de" } return "unknown" } // detectDocumentType detects the type of document func (s *ComputerVisionService) detectDocumentType(text string) string { text = strings.ToLower(text) if strings.Contains(text, "invoice") || strings.Contains(text, "bill") { return "invoice" } else if strings.Contains(text, "receipt") || strings.Contains(text, "purchase") { return "receipt" } else if strings.Contains(text, "resume") || strings.Contains(text, "curriculum") { return "resume" } else if strings.Contains(text, "contract") || strings.Contains(text, "agreement") { return "contract" } else if strings.Contains(text, "report") || strings.Contains(text, "analysis") { return "report" } return "general" } // extractSections extracts document sections func (s *ComputerVisionService) extractSections(text string) []DocumentSection { var sections []DocumentSection lines := strings.Split(text, "\n") for _, line := range lines { line = strings.TrimSpace(line) if line == "" { continue } // Simple section detection (headers followed by content) if len(line) < 100 && (strings.HasSuffix(line, ":") || strings.ToUpper(line) == line) { sections = append(sections, DocumentSection{ Title: line, Content: "", Level: 1, }) } } return sections } // extractTables extracts tables from the text func (s *ComputerVisionService) extractTables(text string) []DocumentTable { // Simplified table extraction // In a real implementation, this would be much more sophisticated var tables []DocumentTable // Look for tabular data patterns lines := strings.Split(text, "\n") for i, line := range lines { if strings.Contains(line, "\t") || strings.Contains(line, " ") { // Potential table row if i > 0 && strings.Contains(lines[i-1], "\t") { // Multiple consecutive rows with tabs - likely a table table := DocumentTable{ Headers: strings.Split(lines[i-1], "\t"), Rows: [][]string{strings.Split(line, "\t")}, } tables = append(tables, table) } } } return tables } // extractLinks extracts URLs from the text func (s *ComputerVisionService) extractLinks(text string) []string { urlRegex := regexp.MustCompile(`https?://[^\s]+`) return urlRegex.FindAllString(text, -1) } // extractEmails extracts email addresses from the text func (s *ComputerVisionService) extractEmails(text string) []string { emailRegex := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`) return emailRegex.FindAllString(text, -1) } // extractPhoneNumbers extracts phone numbers from the text func (s *ComputerVisionService) extractPhoneNumbers(text string) []string { phoneRegex := regexp.MustCompile(`\b\d{3}[-.]?\d{3}[-.]?\d{4}\b`) return phoneRegex.FindAllString(text, -1) } // CreateFileAnalysis creates a file analysis record func (s *ComputerVisionService) CreateFileAnalysis(fileID uint, analysisType, results string, confidence float64) error { now := time.Now() fileAnalysis := models.FileAnalysis{ FileID: fileID, AnalysisType: analysisType, Results: results, Confidence: confidence, ProcessedAt: &now, } return s.db.Create(&fileAnalysis).Error }