// backend\analyze.go package main import ( "bytes" "context" "encoding/base64" "encoding/json" "fmt" "image" "image/draw" "image/jpeg" "math" "net/http" "os" "os/exec" "path/filepath" "sort" "strings" "time" "golang.org/x/image/webp" ) type analyzeVideoReq struct { JobID string `json:"jobId"` Output string `json:"output"` Mode string `json:"mode"` // "sprite" | "video" Goal string `json:"goal"` // "highlights" | "nsfw" } type analyzeHit struct { Time float64 `json:"time"` Label string `json:"label"` Score float64 `json:"score,omitempty"` Start float64 `json:"start,omitempty"` End float64 `json:"end,omitempty"` } type analyzeVideoResp struct { OK bool `json:"ok"` Mode string `json:"mode,omitempty"` Goal string `json:"goal,omitempty"` Hits []analyzeHit `json:"hits"` Segments []aiSegmentMeta `json:"segments,omitempty"` Error string `json:"error,omitempty"` } type spriteFrameCandidate struct { Index int Time float64 } const ( nsfwThresholdModerate = 0.35 nsfwThresholdStrong = 0.60 ) var autoSelectedAILabels = map[string]struct{}{ "anus_exposed": {}, "female_genitalia_exposed": {}, "male_genitalia_exposed": {}, "female_breast_exposed": {}, "buttocks_exposed": {}, } var nsfwIgnoredLabels = map[string]struct{}{ "face_female": {}, "face_male": {}, "belly_covered": {}, "armpits_covered": {}, "anus_covered": {}, } func shouldAutoSelectAnalyzeHit(label string) bool { label = strings.ToLower(strings.TrimSpace(label)) _, ok := autoSelectedAILabels[label] return ok } func isIgnoredNSFWLabel(label string) bool { label = strings.ToLower(strings.TrimSpace(label)) _, ok := nsfwIgnoredLabels[label] return ok } func extractSpriteFrames(spritePath string, ps previewSpriteMetaFileInfo) ([]image.Image, error) { f, err := os.Open(spritePath) if err != nil { return nil, err } defer f.Close() img, err := webp.Decode(f) if err != nil { return nil, err } b := img.Bounds() if ps.Cols <= 0 || ps.Rows <= 0 { return nil, fmt.Errorf("sprite cols/rows fehlen") } cellW := b.Dx() / ps.Cols cellH := b.Dy() / ps.Rows if cellW <= 0 || cellH <= 0 { return nil, fmt.Errorf("ungültige sprite cell size") } count := ps.Count if count <= 0 { count = ps.Cols * ps.Rows } out := make([]image.Image, 0, count) for i := 0; i < count; i++ { col := i % ps.Cols row := i / ps.Cols if row >= ps.Rows { break } srcRect := image.Rect( b.Min.X+col*cellW, b.Min.Y+row*cellH, b.Min.X+(col+1)*cellW, b.Min.Y+(row+1)*cellH, ) dst := image.NewRGBA(image.Rect(0, 0, cellW, cellH)) draw.Draw(dst, dst.Bounds(), img, srcRect.Min, draw.Src) out = append(out, dst) } return out, nil } func encodeImageJPEGBase64(img image.Image) (string, error) { var buf bytes.Buffer if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 85}); err != nil { return "", err } return base64.StdEncoding.EncodeToString(buf.Bytes()), nil } func classifyFrameNSFW(ctx context.Context, img image.Image) (*NsfwImageResponse, error) { _ = ctx b64, err := encodeImageJPEGBase64(img) if err != nil { return nil, err } results, err := detectNSFWFromBase64(b64) if err != nil { return nil, err } return &NsfwImageResponse{ Ok: true, Results: results, }, nil } func nsfwLabelPriority(label string) int { label = strings.ToLower(strings.TrimSpace(label)) switch label { case "anus_exposed", "female_genitalia_exposed", "male_genitalia_exposed", "female_breast_exposed", "buttocks_exposed": return 300 case "female_genitalia_covered", "male_genitalia_covered", "female_breast_covered", "buttocks_covered", "male_breast_exposed", "male_breast_covered": return 200 case "belly_exposed", "armpits_exposed", "feet_exposed", "feet_covered": return 100 case "face_female", "face_male", "belly_covered", "armpits_covered", "anus_covered": return 10 default: return 0 } } func pickBestNSFWResult(results []NsfwFrameResult) (string, float64) { bestLabel := "" bestScore := 0.0 bestPriority := -1 for _, r := range results { label := strings.ToLower(strings.TrimSpace(r.Label)) if label == "" { continue } if isIgnoredNSFWLabel(label) { continue } score := r.Score priority := nsfwLabelPriority(label) if priority > bestPriority { bestLabel = label bestScore = score bestPriority = priority continue } if priority == bestPriority && score > bestScore { bestLabel = label bestScore = score bestPriority = priority } } return bestLabel, bestScore } func extractVideoFrameAt(ctx context.Context, outPath string, atSec float64) (image.Image, error) { tmp, err := os.CreateTemp("", "nsfw-frame-*.jpg") if err != nil { return nil, err } tmpPath := tmp.Name() _ = tmp.Close() defer os.Remove(tmpPath) ffmpegPath := strings.TrimSpace(getSettings().FFmpegPath) if ffmpegPath == "" { ffmpegPath = "ffmpeg" } cmd := exec.CommandContext( ctx, ffmpegPath, "-ss", fmt.Sprintf("%.3f", atSec), "-i", outPath, "-frames:v", "1", "-q:v", "2", "-y", tmpPath, ) if out, err := cmd.CombinedOutput(); err != nil { return nil, fmt.Errorf("ffmpeg fehlgeschlagen: %v: %s", err, strings.TrimSpace(string(out))) } f, err := os.Open(tmpPath) if err != nil { return nil, err } defer f.Close() img, _, err := image.Decode(f) if err != nil { return nil, err } return img, nil } func recordAnalyzeVideo(w http.ResponseWriter, r *http.Request) { if !mustMethod(w, r, http.MethodPost) { return } var req analyzeVideoReq if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "ungültiger body: "+err.Error(), http.StatusBadRequest) return } req.Mode = strings.ToLower(strings.TrimSpace(req.Mode)) req.Goal = strings.ToLower(strings.TrimSpace(req.Goal)) if req.Mode == "" { req.Mode = "sprite" } if req.Goal == "" { req.Goal = "highlights" } switch req.Mode { case "sprite", "video": default: http.Error(w, "mode muss 'sprite' oder 'video' sein", http.StatusBadRequest) return } switch req.Goal { case "highlights", "nsfw": default: http.Error(w, "goal muss 'highlights' oder 'nsfw' sein", http.StatusBadRequest) return } outPath := strings.TrimSpace(req.Output) if outPath == "" { http.Error(w, "output fehlt", http.StatusBadRequest) return } fi, err := os.Stat(outPath) if err != nil || fi == nil || fi.IsDir() || fi.Size() <= 0 { http.Error(w, "output datei nicht gefunden", http.StatusNotFound) return } ctx, cancel := context.WithTimeout(r.Context(), 45*time.Second) defer cancel() var hits []analyzeHit switch req.Mode { case "sprite": hits, err = analyzeVideoFromSprite(ctx, outPath, req.Goal) case "video": hits, err = analyzeVideoFromFrames(ctx, outPath, req.Goal) } if err != nil { respondJSON(w, analyzeVideoResp{ OK: false, Mode: req.Mode, Goal: req.Goal, Hits: []analyzeHit{}, Error: err.Error(), }) return } durationSec, _ := durationSecondsForAnalyze(ctx, outPath) segments := buildSegmentsFromAnalyzeHits(hits, durationSec) ai := &aiAnalysisMeta{ Goal: req.Goal, Mode: req.Mode, Hits: hits, Segments: segments, AnalyzedAtUnix: time.Now().Unix(), } if err := writeVideoAIForFile(ctx, outPath, "", ai); err != nil { fmt.Println("⚠️ writeVideoAIForFile:", err) } respondJSON(w, analyzeVideoResp{ OK: true, Mode: req.Mode, Goal: req.Goal, Hits: hits, Segments: segments, }) } func analyzeVideoFromSprite(ctx context.Context, outPath, goal string) ([]analyzeHit, error) { id := strings.TrimSpace(videoIDFromOutputPath(outPath)) if id == "" { return nil, fmt.Errorf("konnte keine video-id aus output ableiten") } metaPath, err := generatedMetaFile(id) if err != nil || strings.TrimSpace(metaPath) == "" { return nil, fmt.Errorf("meta.json nicht gefunden") } ps, ok := readPreviewSpriteMetaFromMetaFile(metaPath) if !ok { return nil, fmt.Errorf("previewSprite meta fehlt") } if ps.Count <= 0 { return nil, fmt.Errorf("previewSprite count fehlt") } spritePath := filepath.Join(filepath.Dir(metaPath), "preview-sprite.webp") if fi, err := os.Stat(spritePath); err != nil || fi == nil || fi.IsDir() || fi.Size() <= 0 { return nil, fmt.Errorf("preview-sprite.webp nicht gefunden") } durationSec, _ := durationSecondsForAnalyze(ctx, outPath) candidates := buildSpriteFrameCandidates(ps.Count, ps.StepSeconds, durationSec) if len(candidates) == 0 { return nil, fmt.Errorf("keine sprite-kandidaten vorhanden") } // ---------------------------------------------------------------- // HIER ist der Hook für echte AI/Vision-Analyse. // // Aktuell: // - erzeugen wir brauchbare Zeitpunkte aus den Preview-Frames // - gruppieren sie zu Treffern // // Später kannst du hier: // - spritePath + frame indices an ein Vision-Modell geben // - pro Frame Labels / Scores zurückbekommen // - daraus Trefferbereiche bilden // ---------------------------------------------------------------- frameHits, err := analyzeSpriteCandidatesWithAI(ctx, spritePath, ps, candidates, goal) if err != nil { return nil, err } return mergeAnalyzeHits(frameHits), nil } func nsfwThresholdForLabel(label string) float64 { label = strings.ToLower(strings.TrimSpace(label)) switch label { case "anus_exposed", "female_genitalia_exposed", "male_genitalia_exposed", "female_breast_exposed", "buttocks_exposed": return nsfwThresholdStrong case "female_breast_covered", "male_breast_exposed", "male_breast_covered", "buttocks_covered", "female_genitalia_covered", "male_genitalia_covered", "belly_exposed", "armpits_exposed", "feet_exposed", "feet_covered": return nsfwThresholdModerate default: return 0.50 } } func analyzeVideoFromFrames(ctx context.Context, outPath, goal string) ([]analyzeHit, error) { if goal != "nsfw" { return []analyzeHit{}, nil } durationSec, _ := durationSecondsForAnalyze(ctx, outPath) if durationSec <= 0 { return nil, fmt.Errorf("videolänge konnte nicht bestimmt werden") } sampleTimes := buildVideoSampleTimes(durationSec, 24) if len(sampleTimes) == 0 { return nil, fmt.Errorf("keine frame-samples berechnet") } hits := make([]analyzeHit, 0, len(sampleTimes)) for _, t := range sampleTimes { img, err := extractVideoFrameAt(ctx, outPath, t) if err != nil { return nil, fmt.Errorf("frame extraktion bei %.3fs fehlgeschlagen: %w", t, err) } res, err := classifyFrameNSFW(ctx, img) if err != nil { continue } bestLabel, bestScore := pickBestNSFWResult(res.Results) if bestLabel == "" { continue } threshold := nsfwThresholdForLabel(bestLabel) if bestScore < threshold { continue } hits = append(hits, analyzeHit{ Time: t, Label: bestLabel, Score: bestScore, Start: math.Max(0, t-4), End: t + 4, }) } return mergeAnalyzeHits(hits), nil } func analyzeSpriteCandidatesWithAI( ctx context.Context, spritePath string, ps previewSpriteMetaFileInfo, candidates []spriteFrameCandidate, goal string, ) ([]analyzeHit, error) { if goal != "nsfw" { return []analyzeHit{}, nil } frames, err := extractSpriteFrames(spritePath, ps) if err != nil { return nil, fmt.Errorf("sprite frames extrahieren fehlgeschlagen: %w", err) } hits := make([]analyzeHit, 0, len(candidates)) for _, c := range candidates { if c.Index < 0 || c.Index >= len(frames) { continue } res, err := classifyFrameNSFW(ctx, frames[c.Index]) if err != nil { continue } bestLabel, bestScore := pickBestNSFWResult(res.Results) if bestLabel == "" { continue } threshold := nsfwThresholdForLabel(bestLabel) if bestScore < threshold { continue } span := inferredSpanSeconds(ps.StepSeconds, 8) start := math.Max(0, c.Time-(span/2)) end := c.Time + (span / 2) hits = append(hits, analyzeHit{ Time: c.Time, Label: bestLabel, Score: bestScore, Start: start, End: end, }) } return hits, nil } func mergeAnalyzeHits(in []analyzeHit) []analyzeHit { if len(in) == 0 { return []analyzeHit{} } cp := make([]analyzeHit, 0, len(in)) for _, h := range in { label := strings.ToLower(strings.TrimSpace(h.Label)) if label == "" { continue } if isIgnoredNSFWLabel(label) { continue } start := h.Start end := h.End if start <= 0 && end <= 0 { start = h.Time end = h.Time } else { if start <= 0 { start = h.Time } if end <= 0 { end = h.Time } } h.Label = label h.Start = start h.End = end cp = append(cp, h) } if len(cp) == 0 { return []analyzeHit{} } sort.Slice(cp, func(i, j int) bool { if cp[i].Start != cp[j].Start { return cp[i].Start < cp[j].Start } if cp[i].End != cp[j].End { return cp[i].End < cp[j].End } return cp[i].Label < cp[j].Label }) out := make([]analyzeHit, 0, len(cp)) cur := cp[0] for i := 1; i < len(cp); i++ { n := cp[i] // Nur direkt aufeinanderfolgende Treffer mit gleichem Label zusammenfassen const mergeGapSeconds = 1.0 sameLabel := strings.EqualFold(cur.Label, n.Label) touchesOrNear := n.Start <= cur.End+mergeGapSeconds if sameLabel && touchesOrNear { if n.Start < cur.Start { cur.Start = n.Start } if n.End > cur.End { cur.End = n.End } if n.Score > cur.Score { cur.Score = n.Score } cur.Time = (cur.Start + cur.End) / 2 continue } out = append(out, cur) cur = n } out = append(out, cur) return out } func buildSegmentsFromAnalyzeHits(hits []analyzeHit, duration float64) []aiSegmentMeta { if len(hits) == 0 || duration <= 0 { return []aiSegmentMeta{} } out := make([]aiSegmentMeta, 0, len(hits)) for _, hit := range hits { if !shouldAutoSelectAnalyzeHit(hit.Label) { continue } start := hit.Start end := hit.End if start <= 0 && end <= 0 { start = hit.Time end = hit.Time } else { if start <= 0 { start = hit.Time } if end <= 0 { end = hit.Time } } if start > end { start, end = end, start } start = math.Max(0, math.Min(start, duration)) end = math.Max(0, math.Min(end, duration)) if end <= start { continue } out = append(out, aiSegmentMeta{ Label: strings.ToLower(strings.TrimSpace(hit.Label)), StartSeconds: start, EndSeconds: end, DurationSeconds: end - start, Score: hit.Score, AutoSelected: true, }) } if len(out) == 0 { return []aiSegmentMeta{} } sort.Slice(out, func(i, j int) bool { if out[i].StartSeconds != out[j].StartSeconds { return out[i].StartSeconds < out[j].StartSeconds } if out[i].EndSeconds != out[j].EndSeconds { return out[i].EndSeconds < out[j].EndSeconds } return out[i].Label < out[j].Label }) merged := make([]aiSegmentMeta, 0, len(out)) cur := out[0] for i := 1; i < len(out); i++ { n := out[i] const mergeGapSeconds = 15.0 sameLabel := strings.EqualFold(cur.Label, n.Label) nearEnough := n.StartSeconds <= cur.EndSeconds+mergeGapSeconds if sameLabel && nearEnough { if n.StartSeconds < cur.StartSeconds { cur.StartSeconds = n.StartSeconds } if n.EndSeconds > cur.EndSeconds { cur.EndSeconds = n.EndSeconds } cur.DurationSeconds = cur.EndSeconds - cur.StartSeconds if n.Score > cur.Score { cur.Score = n.Score } cur.AutoSelected = cur.AutoSelected || n.AutoSelected continue } merged = append(merged, cur) cur = n } merged = append(merged, cur) return merged } func buildSpriteFrameCandidates(count int, stepSeconds, durationSec float64) []spriteFrameCandidate { if count <= 0 { return nil } out := make([]spriteFrameCandidate, 0, count) stepLooksUsable := false if stepSeconds > 0 && durationSec > 0 { coverage := stepSeconds * math.Max(1, float64(count-1)) stepLooksUsable = coverage >= durationSec*0.7 && coverage <= durationSec*1.3 } for i := 0; i < count; i++ { var t float64 if stepLooksUsable { t = float64(i) * stepSeconds } else if durationSec > 0 && count > 1 { t = (float64(i) / float64(count-1)) * durationSec } else if stepSeconds > 0 { t = float64(i) * stepSeconds } else { t = float64(i) } out = append(out, spriteFrameCandidate{ Index: i, Time: t, }) } return out } func buildVideoSampleTimes(durationSec float64, sampleCount int) []float64 { if durationSec <= 0 || sampleCount <= 0 { return nil } if sampleCount == 1 { return []float64{0} } out := make([]float64, 0, sampleCount) for i := 0; i < sampleCount; i++ { ratio := float64(i) / float64(sampleCount-1) t := ratio * durationSec out = append(out, t) } return out } func inferredSpanSeconds(stepSeconds float64, fallback float64) float64 { if stepSeconds > 0 { return math.Max(2, stepSeconds*1.5) } return fallback } func durationSecondsForAnalyze(ctx context.Context, outPath string) (float64, error) { ctx2, cancel := context.WithTimeout(ctx, 8*time.Second) defer cancel() return durationSecondsCached(ctx2, outPath) } func videoIDFromOutputPath(outPath string) string { base := filepath.Base(strings.TrimSpace(outPath)) if base == "" { return "" } stem := strings.TrimSuffix(base, filepath.Ext(base)) stem = stripHotPrefix(stem) return strings.TrimSpace(stem) }