nsfwapp/backend/analyze.go
2026-03-16 12:46:38 +01:00

816 lines
17 KiB
Go

// backend\analyze.go
package main
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"image"
"image/draw"
"image/jpeg"
"math"
"net/http"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
)
type analyzeVideoReq struct {
JobID string `json:"jobId"`
Output string `json:"output"`
Mode string `json:"mode"` // "sprite" | "video"
Goal string `json:"goal"` // "highlights" | "nsfw"
}
type analyzeHit struct {
Time float64 `json:"time"`
Label string `json:"label"`
Score float64 `json:"score,omitempty"`
Start float64 `json:"start,omitempty"`
End float64 `json:"end,omitempty"`
}
type analyzeVideoResp struct {
OK bool `json:"ok"`
Mode string `json:"mode,omitempty"`
Goal string `json:"goal,omitempty"`
Hits []analyzeHit `json:"hits"`
Segments []aiSegmentMeta `json:"segments,omitempty"`
Error string `json:"error,omitempty"`
}
type spriteFrameCandidate struct {
Index int
Time float64
}
const (
nsfwThresholdModerate = 0.35
nsfwThresholdStrong = 0.60
)
var autoSelectedAILabels = map[string]struct{}{
"anus_exposed": {},
"female_genitalia_exposed": {},
"male_genitalia_exposed": {},
"female_breast_exposed": {},
"buttocks_exposed": {},
}
var nsfwIgnoredLabels = map[string]struct{}{
"face_female": {},
"face_male": {},
"belly_covered": {},
"armpits_covered": {},
"anus_covered": {},
}
func shouldAutoSelectAnalyzeHit(label string) bool {
label = strings.ToLower(strings.TrimSpace(label))
_, ok := autoSelectedAILabels[label]
return ok
}
func isIgnoredNSFWLabel(label string) bool {
label = strings.ToLower(strings.TrimSpace(label))
_, ok := nsfwIgnoredLabels[label]
return ok
}
func extractSpriteFrames(spritePath string, ps previewSpriteMetaFileInfo) ([]image.Image, error) {
f, err := os.Open(spritePath)
if err != nil {
return nil, err
}
defer f.Close()
img, _, err := image.Decode(f)
if err != nil {
return nil, err
}
b := img.Bounds()
if ps.Cols <= 0 || ps.Rows <= 0 {
return nil, fmt.Errorf("sprite cols/rows fehlen")
}
cellW := b.Dx() / ps.Cols
cellH := b.Dy() / ps.Rows
if cellW <= 0 || cellH <= 0 {
return nil, fmt.Errorf("ungültige sprite cell size")
}
count := ps.Count
if count <= 0 {
count = ps.Cols * ps.Rows
}
out := make([]image.Image, 0, count)
for i := 0; i < count; i++ {
col := i % ps.Cols
row := i / ps.Cols
if row >= ps.Rows {
break
}
srcRect := image.Rect(
b.Min.X+col*cellW,
b.Min.Y+row*cellH,
b.Min.X+(col+1)*cellW,
b.Min.Y+(row+1)*cellH,
)
dst := image.NewRGBA(image.Rect(0, 0, cellW, cellH))
draw.Draw(dst, dst.Bounds(), img, srcRect.Min, draw.Src)
out = append(out, dst)
}
return out, nil
}
func encodeImageJPEGBase64(img image.Image) (string, error) {
var buf bytes.Buffer
if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 85}); err != nil {
return "", err
}
return base64.StdEncoding.EncodeToString(buf.Bytes()), nil
}
func classifyFrameNSFW(ctx context.Context, img image.Image) (*NsfwImageResponse, error) {
_ = ctx
b64, err := encodeImageJPEGBase64(img)
if err != nil {
return nil, err
}
results, err := detectNSFWFromBase64(b64)
if err != nil {
return nil, err
}
return &NsfwImageResponse{
Ok: true,
Results: results,
}, nil
}
func nsfwLabelPriority(label string) int {
label = strings.ToLower(strings.TrimSpace(label))
switch label {
case
"anus_exposed",
"female_genitalia_exposed",
"male_genitalia_exposed",
"female_breast_exposed",
"buttocks_exposed":
return 300
case
"female_genitalia_covered",
"male_genitalia_covered",
"female_breast_covered",
"buttocks_covered",
"male_breast_exposed",
"male_breast_covered":
return 200
case
"belly_exposed",
"armpits_exposed",
"feet_exposed",
"feet_covered":
return 100
case
"face_female",
"face_male",
"belly_covered",
"armpits_covered",
"anus_covered":
return 10
default:
return 0
}
}
func pickBestNSFWResult(results []NsfwFrameResult) (string, float64) {
bestLabel := ""
bestScore := 0.0
bestPriority := -1
for _, r := range results {
label := strings.ToLower(strings.TrimSpace(r.Label))
if label == "" {
continue
}
if isIgnoredNSFWLabel(label) {
continue
}
score := r.Score
priority := nsfwLabelPriority(label)
if priority > bestPriority {
bestLabel = label
bestScore = score
bestPriority = priority
continue
}
if priority == bestPriority && score > bestScore {
bestLabel = label
bestScore = score
bestPriority = priority
}
}
return bestLabel, bestScore
}
func extractVideoFrameAt(ctx context.Context, outPath string, atSec float64) (image.Image, error) {
tmp, err := os.CreateTemp("", "nsfw-frame-*.jpg")
if err != nil {
return nil, err
}
tmpPath := tmp.Name()
_ = tmp.Close()
defer os.Remove(tmpPath)
ffmpegPath := strings.TrimSpace(getSettings().FFmpegPath)
if ffmpegPath == "" {
ffmpegPath = "ffmpeg"
}
cmd := exec.CommandContext(
ctx,
ffmpegPath,
"-ss", fmt.Sprintf("%.3f", atSec),
"-i", outPath,
"-frames:v", "1",
"-q:v", "2",
"-y",
tmpPath,
)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("ffmpeg fehlgeschlagen: %v: %s", err, strings.TrimSpace(string(out)))
}
f, err := os.Open(tmpPath)
if err != nil {
return nil, err
}
defer f.Close()
img, _, err := image.Decode(f)
if err != nil {
return nil, err
}
return img, nil
}
func recordAnalyzeVideo(w http.ResponseWriter, r *http.Request) {
if !mustMethod(w, r, http.MethodPost) {
return
}
var req analyzeVideoReq
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "ungültiger body: "+err.Error(), http.StatusBadRequest)
return
}
req.Mode = strings.ToLower(strings.TrimSpace(req.Mode))
req.Goal = strings.ToLower(strings.TrimSpace(req.Goal))
if req.Mode == "" {
req.Mode = "sprite"
}
if req.Goal == "" {
req.Goal = "highlights"
}
switch req.Mode {
case "sprite", "video":
default:
http.Error(w, "mode muss 'sprite' oder 'video' sein", http.StatusBadRequest)
return
}
switch req.Goal {
case "highlights", "nsfw":
default:
http.Error(w, "goal muss 'highlights' oder 'nsfw' sein", http.StatusBadRequest)
return
}
outPath := strings.TrimSpace(req.Output)
if outPath == "" {
http.Error(w, "output fehlt", http.StatusBadRequest)
return
}
fi, err := os.Stat(outPath)
if err != nil || fi == nil || fi.IsDir() || fi.Size() <= 0 {
http.Error(w, "output datei nicht gefunden", http.StatusNotFound)
return
}
ctx, cancel := context.WithTimeout(r.Context(), 45*time.Second)
defer cancel()
var hits []analyzeHit
switch req.Mode {
case "sprite":
hits, err = analyzeVideoFromSprite(ctx, outPath, req.Goal)
case "video":
hits, err = analyzeVideoFromFrames(ctx, outPath, req.Goal)
}
if err != nil {
respondJSON(w, analyzeVideoResp{
OK: false,
Mode: req.Mode,
Goal: req.Goal,
Hits: []analyzeHit{},
Error: err.Error(),
})
return
}
durationSec, _ := durationSecondsForAnalyze(ctx, outPath)
segments := buildSegmentsFromAnalyzeHits(hits, durationSec)
ai := &aiAnalysisMeta{
Goal: req.Goal,
Mode: req.Mode,
Hits: hits,
Segments: segments,
AnalyzedAtUnix: time.Now().Unix(),
}
if err := writeVideoAIForFile(ctx, outPath, "", ai); err != nil {
fmt.Println("⚠️ writeVideoAIForFile:", err)
}
respondJSON(w, analyzeVideoResp{
OK: true,
Mode: req.Mode,
Goal: req.Goal,
Hits: hits,
Segments: segments,
})
}
func analyzeVideoFromSprite(ctx context.Context, outPath, goal string) ([]analyzeHit, error) {
id := strings.TrimSpace(videoIDFromOutputPath(outPath))
if id == "" {
return nil, fmt.Errorf("konnte keine video-id aus output ableiten")
}
metaPath, err := generatedMetaFile(id)
if err != nil || strings.TrimSpace(metaPath) == "" {
return nil, fmt.Errorf("meta.json nicht gefunden")
}
ps, ok := readPreviewSpriteMetaFromMetaFile(metaPath)
if !ok {
return nil, fmt.Errorf("previewSprite meta fehlt")
}
if ps.Count <= 0 {
return nil, fmt.Errorf("previewSprite count fehlt")
}
spritePath := filepath.Join(filepath.Dir(metaPath), "preview-sprite.jpg")
if fi, err := os.Stat(spritePath); err != nil || fi == nil || fi.IsDir() || fi.Size() <= 0 {
return nil, fmt.Errorf("preview-sprite.jpg nicht gefunden")
}
durationSec, _ := durationSecondsForAnalyze(ctx, outPath)
candidates := buildSpriteFrameCandidates(ps.Count, ps.StepSeconds, durationSec)
if len(candidates) == 0 {
return nil, fmt.Errorf("keine sprite-kandidaten vorhanden")
}
// ----------------------------------------------------------------
// HIER ist der Hook für echte AI/Vision-Analyse.
//
// Aktuell:
// - erzeugen wir brauchbare Zeitpunkte aus den Preview-Frames
// - gruppieren sie zu Treffern
//
// Später kannst du hier:
// - spritePath + frame indices an ein Vision-Modell geben
// - pro Frame Labels / Scores zurückbekommen
// - daraus Trefferbereiche bilden
// ----------------------------------------------------------------
frameHits, err := analyzeSpriteCandidatesWithAI(ctx, spritePath, ps, candidates, goal)
if err != nil {
return nil, err
}
return mergeAnalyzeHits(frameHits), nil
}
func nsfwThresholdForLabel(label string) float64 {
label = strings.ToLower(strings.TrimSpace(label))
switch label {
case
"anus_exposed",
"female_genitalia_exposed",
"male_genitalia_exposed",
"female_breast_exposed",
"buttocks_exposed":
return nsfwThresholdStrong
case
"female_breast_covered",
"male_breast_exposed",
"male_breast_covered",
"buttocks_covered",
"female_genitalia_covered",
"male_genitalia_covered",
"belly_exposed",
"armpits_exposed",
"feet_exposed",
"feet_covered":
return nsfwThresholdModerate
default:
return 0.50
}
}
func analyzeVideoFromFrames(ctx context.Context, outPath, goal string) ([]analyzeHit, error) {
if goal != "nsfw" {
return []analyzeHit{}, nil
}
durationSec, _ := durationSecondsForAnalyze(ctx, outPath)
if durationSec <= 0 {
return nil, fmt.Errorf("videolänge konnte nicht bestimmt werden")
}
sampleTimes := buildVideoSampleTimes(durationSec, 24)
if len(sampleTimes) == 0 {
return nil, fmt.Errorf("keine frame-samples berechnet")
}
hits := make([]analyzeHit, 0, len(sampleTimes))
for _, t := range sampleTimes {
img, err := extractVideoFrameAt(ctx, outPath, t)
if err != nil {
return nil, fmt.Errorf("frame extraktion bei %.3fs fehlgeschlagen: %w", t, err)
}
res, err := classifyFrameNSFW(ctx, img)
if err != nil {
continue
}
bestLabel, bestScore := pickBestNSFWResult(res.Results)
if bestLabel == "" {
continue
}
threshold := nsfwThresholdForLabel(bestLabel)
if bestScore < threshold {
continue
}
hits = append(hits, analyzeHit{
Time: t,
Label: bestLabel,
Score: bestScore,
Start: math.Max(0, t-4),
End: t + 4,
})
}
return mergeAnalyzeHits(hits), nil
}
func analyzeSpriteCandidatesWithAI(
ctx context.Context,
spritePath string,
ps previewSpriteMetaFileInfo,
candidates []spriteFrameCandidate,
goal string,
) ([]analyzeHit, error) {
if goal != "nsfw" {
return []analyzeHit{}, nil
}
frames, err := extractSpriteFrames(spritePath, ps)
if err != nil {
return nil, fmt.Errorf("sprite frames extrahieren fehlgeschlagen: %w", err)
}
hits := make([]analyzeHit, 0, len(candidates))
for _, c := range candidates {
if c.Index < 0 || c.Index >= len(frames) {
continue
}
res, err := classifyFrameNSFW(ctx, frames[c.Index])
if err != nil {
continue
}
bestLabel, bestScore := pickBestNSFWResult(res.Results)
if bestLabel == "" {
continue
}
threshold := nsfwThresholdForLabel(bestLabel)
if bestScore < threshold {
continue
}
span := inferredSpanSeconds(ps.StepSeconds, 8)
start := math.Max(0, c.Time-(span/2))
end := c.Time + (span / 2)
hits = append(hits, analyzeHit{
Time: c.Time,
Label: bestLabel,
Score: bestScore,
Start: start,
End: end,
})
}
return hits, nil
}
func mergeAnalyzeHits(in []analyzeHit) []analyzeHit {
if len(in) == 0 {
return []analyzeHit{}
}
cp := make([]analyzeHit, 0, len(in))
for _, h := range in {
label := strings.ToLower(strings.TrimSpace(h.Label))
if label == "" {
continue
}
if isIgnoredNSFWLabel(label) {
continue
}
start := h.Start
end := h.End
if start <= 0 && end <= 0 {
start = h.Time
end = h.Time
} else {
if start <= 0 {
start = h.Time
}
if end <= 0 {
end = h.Time
}
}
h.Label = label
h.Start = start
h.End = end
cp = append(cp, h)
}
if len(cp) == 0 {
return []analyzeHit{}
}
sort.Slice(cp, func(i, j int) bool {
if cp[i].Start != cp[j].Start {
return cp[i].Start < cp[j].Start
}
if cp[i].End != cp[j].End {
return cp[i].End < cp[j].End
}
return cp[i].Label < cp[j].Label
})
out := make([]analyzeHit, 0, len(cp))
cur := cp[0]
for i := 1; i < len(cp); i++ {
n := cp[i]
// Nur direkt aufeinanderfolgende Treffer mit gleichem Label zusammenfassen
const mergeGapSeconds = 1.0
sameLabel := strings.EqualFold(cur.Label, n.Label)
touchesOrNear := n.Start <= cur.End+mergeGapSeconds
if sameLabel && touchesOrNear {
if n.Start < cur.Start {
cur.Start = n.Start
}
if n.End > cur.End {
cur.End = n.End
}
if n.Score > cur.Score {
cur.Score = n.Score
}
cur.Time = (cur.Start + cur.End) / 2
continue
}
out = append(out, cur)
cur = n
}
out = append(out, cur)
return out
}
func buildSegmentsFromAnalyzeHits(hits []analyzeHit, duration float64) []aiSegmentMeta {
if len(hits) == 0 || duration <= 0 {
return []aiSegmentMeta{}
}
out := make([]aiSegmentMeta, 0, len(hits))
for _, hit := range hits {
if !shouldAutoSelectAnalyzeHit(hit.Label) {
continue
}
start := hit.Start
end := hit.End
if start <= 0 && end <= 0 {
start = hit.Time
end = hit.Time
} else {
if start <= 0 {
start = hit.Time
}
if end <= 0 {
end = hit.Time
}
}
if start > end {
start, end = end, start
}
start = math.Max(0, math.Min(start, duration))
end = math.Max(0, math.Min(end, duration))
if end <= start {
continue
}
out = append(out, aiSegmentMeta{
Label: strings.ToLower(strings.TrimSpace(hit.Label)),
StartSeconds: start,
EndSeconds: end,
DurationSeconds: end - start,
Score: hit.Score,
AutoSelected: true,
})
}
if len(out) == 0 {
return []aiSegmentMeta{}
}
sort.Slice(out, func(i, j int) bool {
if out[i].StartSeconds != out[j].StartSeconds {
return out[i].StartSeconds < out[j].StartSeconds
}
if out[i].EndSeconds != out[j].EndSeconds {
return out[i].EndSeconds < out[j].EndSeconds
}
return out[i].Label < out[j].Label
})
merged := make([]aiSegmentMeta, 0, len(out))
cur := out[0]
for i := 1; i < len(out); i++ {
n := out[i]
// Direkt aufeinanderfolgende Segmente mit gleichem Label immer mergen,
// unabhängig von der Lücke. Sobald ein anderes Label dazwischen liegt,
// wird automatisch nicht gemergt, weil wir nur mit dem direkten Nachfolger arbeiten.
if strings.EqualFold(cur.Label, n.Label) {
if n.StartSeconds < cur.StartSeconds {
cur.StartSeconds = n.StartSeconds
}
if n.EndSeconds > cur.EndSeconds {
cur.EndSeconds = n.EndSeconds
}
cur.DurationSeconds = cur.EndSeconds - cur.StartSeconds
if n.Score > cur.Score {
cur.Score = n.Score
}
cur.AutoSelected = cur.AutoSelected || n.AutoSelected
continue
}
merged = append(merged, cur)
cur = n
}
merged = append(merged, cur)
return merged
}
func buildSpriteFrameCandidates(count int, stepSeconds, durationSec float64) []spriteFrameCandidate {
if count <= 0 {
return nil
}
out := make([]spriteFrameCandidate, 0, count)
stepLooksUsable := false
if stepSeconds > 0 && durationSec > 0 {
coverage := stepSeconds * math.Max(1, float64(count-1))
stepLooksUsable = coverage >= durationSec*0.7 && coverage <= durationSec*1.3
}
for i := 0; i < count; i++ {
var t float64
if stepLooksUsable {
t = float64(i) * stepSeconds
} else if durationSec > 0 && count > 1 {
t = (float64(i) / float64(count-1)) * durationSec
} else if stepSeconds > 0 {
t = float64(i) * stepSeconds
} else {
t = float64(i)
}
out = append(out, spriteFrameCandidate{
Index: i,
Time: t,
})
}
return out
}
func buildVideoSampleTimes(durationSec float64, sampleCount int) []float64 {
if durationSec <= 0 || sampleCount <= 0 {
return nil
}
if sampleCount == 1 {
return []float64{0}
}
out := make([]float64, 0, sampleCount)
for i := 0; i < sampleCount; i++ {
ratio := float64(i) / float64(sampleCount-1)
t := ratio * durationSec
out = append(out, t)
}
return out
}
func inferredSpanSeconds(stepSeconds float64, fallback float64) float64 {
if stepSeconds > 0 {
return math.Max(2, stepSeconds*1.5)
}
return fallback
}
func durationSecondsForAnalyze(ctx context.Context, outPath string) (float64, error) {
ctx2, cancel := context.WithTimeout(ctx, 8*time.Second)
defer cancel()
return durationSecondsCached(ctx2, outPath)
}
func videoIDFromOutputPath(outPath string) string {
base := filepath.Base(strings.TrimSpace(outPath))
if base == "" {
return ""
}
stem := strings.TrimSuffix(base, filepath.Ext(base))
stem = stripHotPrefix(stem)
return strings.TrimSpace(stem)
}