feat: offline OCR (Tesseract) + embedding category classifier (@xenova/transformers)
Tesseract OCR (PHP, server-side):
- Dockerfile: adds tesseract-ocr + tesseract-ocr-ita + libgd-dev (gd extension)
- api/index.php: new tesseractReadExpiry() — decodes base64 image, pre-processes with GD (2× upscale, greyscale, auto-contrast, sharpen), runs tesseract CLI with ita+eng PSM-6, extracts date with multi-pattern regex (DD/MM/YYYY, MM/YYYY, ISO, named-month), returns YYYY-MM-DD + confidence
- geminiReadExpiry() now: (1) tries Tesseract first; (2) falls back to Gemini Vision if OCR returns null or no date found; (3) passes source ('ocr'|'gemini') in response
@xenova/transformers embedding classifier (browser-side):
- index.html: ES-module bootstrap that lazy-loads 'Xenova/all-MiniLM-L6-v2' quantized (~23 MB, cached in browser) via window._getCategoryPipeline(); pre-warms on first scan page visit
- assets/js/app.js: classifyCategoryByEmbedding(name) — embeds product name + 16 category anchor descriptions, cosine similarity, threshold 0.30; results cached in _embeddingCache Map
- autoDetectCategory(): after keyword map misses, fires classifyCategoryByEmbedding async and updates select when resolved (respects manuallySet flag)
- createQuickProduct(): if regex returned 'altro', silently patches category with embedding result via a background api call
This commit is contained in:
+6
-2
@@ -1,11 +1,15 @@
|
|||||||
FROM php:8.2-apache
|
FROM php:8.2-apache
|
||||||
|
|
||||||
# Install required PHP extensions
|
# Install required PHP extensions + Tesseract OCR for offline expiry date reading
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
libsqlite3-dev \
|
libsqlite3-dev \
|
||||||
libcurl4-openssl-dev \
|
libcurl4-openssl-dev \
|
||||||
libonig-dev \
|
libonig-dev \
|
||||||
&& docker-php-ext-install pdo_sqlite curl mbstring \
|
libgd-dev \
|
||||||
|
tesseract-ocr \
|
||||||
|
tesseract-ocr-ita \
|
||||||
|
tesseract-ocr-eng \
|
||||||
|
&& docker-php-ext-install pdo_sqlite curl mbstring gd \
|
||||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Enable Apache mod_rewrite and mod_headers
|
# Enable Apache mod_rewrite and mod_headers
|
||||||
|
|||||||
+179
-6
@@ -2243,13 +2243,162 @@ function getOpenedShelfLifeAction(): void {
|
|||||||
echo json_encode(['days' => $days]);
|
echo json_encode(['days' => $days]);
|
||||||
}
|
}
|
||||||
|
|
||||||
function geminiReadExpiry(): void {
|
// ===== TESSERACT OFFLINE OCR HELPER =====
|
||||||
$apiKey = env('GEMINI_API_KEY');
|
|
||||||
if (empty($apiKey)) {
|
/**
|
||||||
echo json_encode(['success' => false, 'error' => 'no_api_key']);
|
* Try to extract an expiry date from a base64 image using Tesseract OCR (offline).
|
||||||
return;
|
* Returns ['found'=>true,'date'=>'YYYY-MM-DD','raw_text'=>'...','confidence'=>float]
|
||||||
|
* or ['found'=>false,'raw_text'=>'...']
|
||||||
|
*
|
||||||
|
* Strategy:
|
||||||
|
* 1. Decode base64 → temp JPEG
|
||||||
|
* 2. Pre-process with GD: desaturate, auto-contrast, sharpen, 2× upscale
|
||||||
|
* 3. Run tesseract with Italian+English langs, PSM-6 (block of text)
|
||||||
|
* 4. Run date-format regexes (Italian & international patterns)
|
||||||
|
* 5. Normalise to YYYY-MM-DD
|
||||||
|
*
|
||||||
|
* Returns null if tesseract binary is not available or GD is not compiled in.
|
||||||
|
*/
|
||||||
|
function tesseractReadExpiry(string $imageBase64): ?array {
|
||||||
|
// Require both the binary and the GD extension
|
||||||
|
if (!function_exists('imagecreatefromstring')) return null;
|
||||||
|
$tesseract = trim(shell_exec('which tesseract 2>/dev/null') ?? '');
|
||||||
|
if (empty($tesseract)) return null;
|
||||||
|
|
||||||
|
// ── 1. Decode image ────────────────────────────────────────────────────
|
||||||
|
$imgData = base64_decode($imageBase64);
|
||||||
|
if ($imgData === false || strlen($imgData) < 100) return null;
|
||||||
|
|
||||||
|
$src = @imagecreatefromstring($imgData);
|
||||||
|
if (!$src) return null;
|
||||||
|
|
||||||
|
$w = imagesx($src);
|
||||||
|
$h = imagesy($src);
|
||||||
|
|
||||||
|
// ── 2. Pre-process ─────────────────────────────────────────────────────
|
||||||
|
// 2a. Upscale ×2 – Tesseract performs best on ≥300 DPI; packaging photos
|
||||||
|
// are often low-res so doubling helps character recognition.
|
||||||
|
$w2 = $w * 2;
|
||||||
|
$h2 = $h * 2;
|
||||||
|
$dst = imagecreatetruecolor($w2, $h2);
|
||||||
|
imagecopyresampled($dst, $src, 0, 0, 0, 0, $w2, $h2, $w, $h);
|
||||||
|
imagedestroy($src);
|
||||||
|
|
||||||
|
// 2b. Greyscale + auto-contrast
|
||||||
|
imagefilter($dst, IMG_FILTER_GRAYSCALE);
|
||||||
|
imagefilter($dst, IMG_FILTER_CONTRAST, -40); // negative = increase contrast in GD
|
||||||
|
|
||||||
|
// 2c. Sharpen (convolution kernel)
|
||||||
|
$kernel = [[0,-1,0],[-1,5,-1],[0,-1,0]];
|
||||||
|
imageconvolution($dst, $kernel, 1, 0);
|
||||||
|
|
||||||
|
// ── 3. Write temp file & run Tesseract ────────────────────────────────
|
||||||
|
$tmpIn = sys_get_temp_dir() . '/ocr_in_' . uniqid() . '.png';
|
||||||
|
$tmpOut = sys_get_temp_dir() . '/ocr_out_' . uniqid();
|
||||||
|
imagepng($dst, $tmpIn);
|
||||||
|
imagedestroy($dst);
|
||||||
|
|
||||||
|
// PSM 6 = assume a single uniform block of text (good for cropped label areas)
|
||||||
|
$cmd = escapeshellcmd($tesseract)
|
||||||
|
. ' ' . escapeshellarg($tmpIn)
|
||||||
|
. ' ' . escapeshellarg($tmpOut)
|
||||||
|
. ' -l ita+eng --psm 6 --oem 1'
|
||||||
|
. ' quiet 2>/dev/null';
|
||||||
|
shell_exec($cmd);
|
||||||
|
|
||||||
|
$rawText = '';
|
||||||
|
if (file_exists($tmpOut . '.txt')) {
|
||||||
|
$rawText = trim(file_get_contents($tmpOut . '.txt'));
|
||||||
|
unlink($tmpOut . '.txt');
|
||||||
|
}
|
||||||
|
if (file_exists($tmpIn)) unlink($tmpIn);
|
||||||
|
|
||||||
|
if (empty($rawText)) return ['found' => false, 'raw_text' => ''];
|
||||||
|
|
||||||
|
// ── 4. Parse date patterns ─────────────────────────────────────────────
|
||||||
|
$today = new DateTime();
|
||||||
|
$currentYear = (int)$today->format('Y');
|
||||||
|
|
||||||
|
// Normalise confusable OCR chars: O→0, I/l→1, S→5
|
||||||
|
$clean = preg_replace('/\bO\b/', '0', $rawText);
|
||||||
|
$clean = preg_replace('/[Il](?=\d)/', '1', $clean);
|
||||||
|
|
||||||
|
$patterns = [
|
||||||
|
// DD/MM/YYYY or DD-MM-YYYY or DD.MM.YYYY
|
||||||
|
'/\b(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{4})\b/',
|
||||||
|
// MM/YYYY or MM-YYYY (best-before month/year only)
|
||||||
|
'/\b(\d{1,2})[\/\-\.](\d{4})\b/',
|
||||||
|
// YYYY-MM-DD (ISO)
|
||||||
|
'/\b(\d{4})-(\d{2})-(\d{2})\b/',
|
||||||
|
// DD MMM YYYY (e.g. 15 APR 2026)
|
||||||
|
'/\b(\d{1,2})\s+(gen|feb|mar|apr|mag|giu|lug|ago|set|ott|nov|dic|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\.?\s*(\d{4})\b/i',
|
||||||
|
// MMM YYYY (e.g. APR 2026)
|
||||||
|
'/\b(gen|feb|mar|apr|mag|giu|lug|ago|set|ott|nov|dic|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\.?\s*(\d{4})\b/i',
|
||||||
|
];
|
||||||
|
|
||||||
|
$monthMap = [
|
||||||
|
'gen'=>1,'jan'=>1,'feb'=>2,'mar'=>3,'apr'=>4,'mag'=>5,'may'=>5,
|
||||||
|
'giu'=>6,'jun'=>6,'lug'=>7,'jul'=>7,'ago'=>8,'aug'=>8,
|
||||||
|
'set'=>9,'sep'=>9,'ott'=>10,'oct'=>10,'nov'=>11,'dic'=>12,'dec'=>12,
|
||||||
|
];
|
||||||
|
|
||||||
|
$candidates = [];
|
||||||
|
foreach ($patterns as $pat) {
|
||||||
|
if (!preg_match_all($pat, $clean, $m, PREG_SET_ORDER)) continue;
|
||||||
|
foreach ($m as $match) {
|
||||||
|
$full = $match[0];
|
||||||
|
// Determine Y/M/D from which pattern matched
|
||||||
|
if (preg_match('/^\d{4}-\d{2}-\d{2}$/', $full)) {
|
||||||
|
// ISO
|
||||||
|
$y = (int)$match[1]; $mo = (int)$match[2]; $d = (int)$match[3];
|
||||||
|
} elseif (isset($monthMap[strtolower($match[2] ?? '')])) {
|
||||||
|
// DD MMM YYYY
|
||||||
|
$d = (int)$match[1];
|
||||||
|
$mo = $monthMap[strtolower($match[2])];
|
||||||
|
$y = (int)$match[3];
|
||||||
|
} elseif (isset($monthMap[strtolower($match[1] ?? '')])) {
|
||||||
|
// MMM YYYY
|
||||||
|
$d = 1;
|
||||||
|
$mo = $monthMap[strtolower($match[1])];
|
||||||
|
$y = (int)$match[2];
|
||||||
|
} elseif (count($match) === 3) {
|
||||||
|
// MM/YYYY
|
||||||
|
$mo = (int)$match[1]; $y = (int)$match[2]; $d = 1;
|
||||||
|
} else {
|
||||||
|
// DD/MM/YYYY
|
||||||
|
$d = (int)$match[1]; $mo = (int)$match[2]; $y = (int)$match[3];
|
||||||
|
}
|
||||||
|
// Sanity
|
||||||
|
if ($y < 2020 || $y > 2040) continue;
|
||||||
|
if ($mo < 1 || $mo > 12) continue;
|
||||||
|
if ($d < 1 || $d > 31) continue;
|
||||||
|
$dateStr = sprintf('%04d-%02d-%02d', $y, $mo, $d);
|
||||||
|
// Prefer dates in the future or near past (within 2 years)
|
||||||
|
$dt = new DateTime($dateStr);
|
||||||
|
$diff = (int)$today->diff($dt)->days * ($dt >= $today ? 1 : -1);
|
||||||
|
$candidates[] = ['date' => $dateStr, 'score' => $diff, 'raw' => $full];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (empty($candidates)) {
|
||||||
|
return ['found' => false, 'raw_text' => $rawText];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick candidate closest to today (but prefer future dates, then near-past)
|
||||||
|
usort($candidates, fn($a, $b) => abs($a['score']) - abs($b['score']));
|
||||||
|
$best = $candidates[0];
|
||||||
|
|
||||||
|
return [
|
||||||
|
'found' => true,
|
||||||
|
'date' => $best['date'],
|
||||||
|
'raw_text' => $rawText,
|
||||||
|
'raw_match' => $best['raw'],
|
||||||
|
'confidence' => count($candidates) === 1 ? 0.9 : 0.75,
|
||||||
|
'source' => 'tesseract',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
function geminiReadExpiry(): void {
|
||||||
$input = json_decode(file_get_contents('php://input'), true);
|
$input = json_decode(file_get_contents('php://input'), true);
|
||||||
$imageBase64 = $input['image'] ?? '';
|
$imageBase64 = $input['image'] ?? '';
|
||||||
|
|
||||||
@@ -2258,6 +2407,30 @@ function geminiReadExpiry(): void {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Step 1: Try Tesseract offline OCR first ────────────────────────────
|
||||||
|
$ocrResult = tesseractReadExpiry($imageBase64);
|
||||||
|
if ($ocrResult !== null && !empty($ocrResult['found']) && !empty($ocrResult['date'])) {
|
||||||
|
echo json_encode([
|
||||||
|
'success' => true,
|
||||||
|
'expiry_date' => $ocrResult['date'],
|
||||||
|
'raw_text' => $ocrResult['raw_text'] ?? '',
|
||||||
|
'source' => 'ocr',
|
||||||
|
]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 2: Fall back to Gemini Vision ────────────────────────────────
|
||||||
|
$apiKey = env('GEMINI_API_KEY');
|
||||||
|
if (empty($apiKey)) {
|
||||||
|
// No Gemini key and OCR failed/unavailable
|
||||||
|
echo json_encode([
|
||||||
|
'success' => false,
|
||||||
|
'error' => 'no_api_key',
|
||||||
|
'raw_text' => $ocrResult['raw_text'] ?? '',
|
||||||
|
]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Call Gemini API
|
// Call Gemini API
|
||||||
$payload = [
|
$payload = [
|
||||||
'contents' => [
|
'contents' => [
|
||||||
@@ -2305,7 +2478,7 @@ function geminiReadExpiry(): void {
|
|||||||
// Validate date format
|
// Validate date format
|
||||||
$date = $parsed['date'];
|
$date = $parsed['date'];
|
||||||
if (preg_match('/^\d{4}-\d{2}-\d{2}$/', $date)) {
|
if (preg_match('/^\d{4}-\d{2}-\d{2}$/', $date)) {
|
||||||
echo json_encode(['success' => true, 'expiry_date' => $date, 'raw_text' => $parsed['raw_text'] ?? '']);
|
echo json_encode(['success' => true, 'expiry_date' => $date, 'raw_text' => $parsed['raw_text'] ?? '', 'source' => 'gemini']);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+142
-2
@@ -1086,6 +1086,106 @@ function guessCategoryFromName(name) {
|
|||||||
return 'altro';
|
return 'altro';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// Embedding-based category classifier (async, @xenova/transformers)
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// Canonical descriptions for each local category (used as embedding anchors).
|
||||||
|
const _CATEGORY_DESCRIPTIONS = {
|
||||||
|
latticini: 'latte yogurt formaggio burro panna mozzarella latticini dairy',
|
||||||
|
carne: 'carne pollo manzo maiale vitello prosciutto salame bresaola meat',
|
||||||
|
pesce: 'pesce tonno salmone merluzzo gamberi seafood fish',
|
||||||
|
frutta: 'frutta mela banana arancia pera fragola uva kiwi fruit',
|
||||||
|
verdura: 'verdura insalata zucchina carota cipolla spinaci tomato vegetables',
|
||||||
|
pasta: 'pasta spaghetti penne fusilli riso risotto noodles rice',
|
||||||
|
pane: 'pane fette biscottate grissini cracker toast bread bakery',
|
||||||
|
surgelati: 'surgelati congelato frozen gelato ice cream',
|
||||||
|
bevande: 'acqua birra vino succo caffè tè bevande drinks beverages',
|
||||||
|
condimenti: 'olio aceto sale zucchero farina ketchup maionese senape spezie condiments',
|
||||||
|
snack: 'biscotti cioccolato patatine snack caramelle wafer merendine',
|
||||||
|
conserve: 'conserve pelati passata marmellata miele legumi ceci beans canned',
|
||||||
|
cereali: 'cereali muesli granola fiocchi d\'avena oat breakfast cereal',
|
||||||
|
igiene: 'sapone shampoo dentifricio deodorante igiene personale hygiene',
|
||||||
|
pulizia: 'detersivo detergente pulizia casa sgrassatore cleaning',
|
||||||
|
altro: 'prodotto generico varie altro miscellaneous',
|
||||||
|
};
|
||||||
|
|
||||||
|
// In-memory cache: productName → category (avoids re-embedding the same product)
|
||||||
|
const _embeddingCache = new Map();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cosine similarity between two Float32Array vectors.
|
||||||
|
*/
|
||||||
|
function _cosineSim(a, b) {
|
||||||
|
let dot = 0, na = 0, nb = 0;
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dot += a[i] * b[i];
|
||||||
|
na += a[i] * a[i];
|
||||||
|
nb += b[i] * b[i];
|
||||||
|
}
|
||||||
|
return dot / (Math.sqrt(na) * Math.sqrt(nb) + 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mean-pool a [1, tokens, dims] tensor → Float32Array of length dims.
|
||||||
|
*/
|
||||||
|
function _meanPool(tensor) {
|
||||||
|
const [, tokens, dims] = tensor.dims;
|
||||||
|
const data = tensor.data;
|
||||||
|
const out = new Float32Array(dims);
|
||||||
|
for (let t = 0; t < tokens; t++) {
|
||||||
|
for (let d = 0; d < dims; d++) {
|
||||||
|
out[d] += data[t * dims + d];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (let d = 0; d < dims; d++) out[d] /= tokens;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Async: returns the best-matching category key for `productName`.
|
||||||
|
* Returns null if the model is unavailable or similarity is too low.
|
||||||
|
* THRESHOLD 0.30 — below this the regex fallback is more reliable.
|
||||||
|
*/
|
||||||
|
async function classifyCategoryByEmbedding(productName) {
|
||||||
|
if (!productName) return null;
|
||||||
|
const key = productName.toLowerCase().trim();
|
||||||
|
if (_embeddingCache.has(key)) return _embeddingCache.get(key);
|
||||||
|
|
||||||
|
if (typeof window._getCategoryPipeline !== 'function') return null;
|
||||||
|
const pipe = await window._getCategoryPipeline();
|
||||||
|
if (!pipe) return null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const labels = Object.keys(_CATEGORY_DESCRIPTIONS);
|
||||||
|
const texts = [key, ...labels.map(l => _CATEGORY_DESCRIPTIONS[l])];
|
||||||
|
|
||||||
|
// Embed all texts in one batched call for efficiency
|
||||||
|
const output = await pipe(texts, { pooling: 'mean', normalize: true });
|
||||||
|
const vectors = labels.map((_, i) => {
|
||||||
|
const t = output[i + 1];
|
||||||
|
// output[i] may be a Tensor or already a plain array-like
|
||||||
|
return t.dims ? _meanPool(t) : new Float32Array(t.data ?? t);
|
||||||
|
});
|
||||||
|
const queryVec = output[0].dims
|
||||||
|
? _meanPool(output[0])
|
||||||
|
: new Float32Array(output[0].data ?? output[0]);
|
||||||
|
|
||||||
|
let bestLabel = null, bestSim = 0;
|
||||||
|
for (let i = 0; i < labels.length; i++) {
|
||||||
|
const sim = _cosineSim(queryVec, vectors[i]);
|
||||||
|
if (sim > bestSim) { bestSim = sim; bestLabel = labels[i]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = (bestSim >= 0.30 && bestLabel !== 'altro') ? bestLabel : null;
|
||||||
|
_embeddingCache.set(key, result);
|
||||||
|
return result;
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[EverShelf] Embedding classify error:', e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Determine safety level for expired products
|
// Determine safety level for expired products
|
||||||
// Returns { level: 'danger'|'warning'|'ok', icon, label, tip }
|
// Returns { level: 'danger'|'warning'|'ok', icon, label, tip }
|
||||||
function getExpiredSafety(item, daysExpired) {
|
function getExpiredSafety(item, daysExpired) {
|
||||||
@@ -2024,7 +2124,12 @@ function showPage(pageId, param = null) {
|
|||||||
}
|
}
|
||||||
loadInventory();
|
loadInventory();
|
||||||
break;
|
break;
|
||||||
case 'scan': initScanner(); clearQuickNameResults(); updateSpesaBanner(); break;
|
case 'scan': initScanner(); clearQuickNameResults(); updateSpesaBanner();
|
||||||
|
// Pre-warm the embedding model the first time user visits scan page
|
||||||
|
if (typeof window._getCategoryPipeline === 'function' && !window._categoryPipelineReady) {
|
||||||
|
window._getCategoryPipeline(); // fire-and-forget
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 'products': loadAllProducts(); break;
|
case 'products': loadAllProducts(); break;
|
||||||
case 'shopping': loadShoppingList(); break;
|
case 'shopping': loadShoppingList(); break;
|
||||||
case 'recipe': loadRecipeArchive(); break;
|
case 'recipe': loadRecipeArchive(); break;
|
||||||
@@ -4470,7 +4575,7 @@ function selectQuickProduct(product) {
|
|||||||
async function createQuickProduct(name) {
|
async function createQuickProduct(name) {
|
||||||
showLoading(true);
|
showLoading(true);
|
||||||
|
|
||||||
// Auto-detect category from name
|
// Auto-detect category from name (sync regex first)
|
||||||
const category = guessCategoryFromName(name);
|
const category = guessCategoryFromName(name);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -4494,6 +4599,27 @@ async function createQuickProduct(name) {
|
|||||||
showLoading(false);
|
showLoading(false);
|
||||||
clearQuickNameResults();
|
clearQuickNameResults();
|
||||||
showToast('Prodotto creato!', 'success');
|
showToast('Prodotto creato!', 'success');
|
||||||
|
|
||||||
|
// If regex gave 'altro', try embedding in background and silently update
|
||||||
|
if (category === 'altro' && typeof classifyCategoryByEmbedding === 'function') {
|
||||||
|
classifyCategoryByEmbedding(name).then(async embCat => {
|
||||||
|
if (!embCat || !result.id) return;
|
||||||
|
try {
|
||||||
|
await api('product_save', {}, 'POST', {
|
||||||
|
id: result.id,
|
||||||
|
name: name,
|
||||||
|
brand: '',
|
||||||
|
category: embCat,
|
||||||
|
unit: 'pz',
|
||||||
|
default_quantity: 1,
|
||||||
|
});
|
||||||
|
if (currentProduct && currentProduct.id === result.id) {
|
||||||
|
currentProduct.category = embCat;
|
||||||
|
}
|
||||||
|
} catch (_) { /* silent */ }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
showProductAction();
|
showProductAction();
|
||||||
} else {
|
} else {
|
||||||
showLoading(false);
|
showLoading(false);
|
||||||
@@ -4614,6 +4740,20 @@ function autoDetectCategory() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Embedding fallback: async, only when keywords didn't match ──────────
|
||||||
|
// Kick off model load (no-op if already loaded/loading) and update the
|
||||||
|
// select once the result is ready. Only runs when pipeline is available.
|
||||||
|
if (typeof classifyCategoryByEmbedding === 'function') {
|
||||||
|
classifyCategoryByEmbedding(document.getElementById('pf-name').value).then(embCat => {
|
||||||
|
if (!embCat) return;
|
||||||
|
// Re-check manuallySet — user might have picked something while awaiting
|
||||||
|
const sel = document.getElementById('pf-category');
|
||||||
|
if (!sel || sel.dataset.manuallySet === 'true') return;
|
||||||
|
sel.value = embCat;
|
||||||
|
onCategoryChange(true);
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function onCategoryChange(fromAutoDetect = false) {
|
function onCategoryChange(fromAutoDetect = false) {
|
||||||
|
|||||||
+33
@@ -14,6 +14,39 @@
|
|||||||
<link rel="stylesheet" href="assets/css/style.css?v=20260421a">
|
<link rel="stylesheet" href="assets/css/style.css?v=20260421a">
|
||||||
<!-- QuaggaJS for barcode scanning -->
|
<!-- QuaggaJS for barcode scanning -->
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@ericblade/quagga2@1.8.4/dist/quagga.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@ericblade/quagga2@1.8.4/dist/quagga.min.js"></script>
|
||||||
|
<!-- @xenova/transformers: ES-module bootstrap that exposes a lazy category-classifier as window._categoryPipelinePromise -->
|
||||||
|
<script type="module">
|
||||||
|
// Lazy-load the embedding pipeline only when first needed.
|
||||||
|
// Using a dynamic import so the ~2 MB WASM is not fetched on page load.
|
||||||
|
window._categoryPipelineReady = false;
|
||||||
|
window._categoryPipelinePromise = null;
|
||||||
|
|
||||||
|
window._getCategoryPipeline = async function() {
|
||||||
|
if (window._categoryPipelinePromise) return window._categoryPipelinePromise;
|
||||||
|
window._categoryPipelinePromise = (async () => {
|
||||||
|
try {
|
||||||
|
const { pipeline, env } = await import(
|
||||||
|
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2/src/transformers.min.js'
|
||||||
|
);
|
||||||
|
// Keep WASM/model files in the browser cache; disable remote model check
|
||||||
|
// to avoid CORS issues with the self-hosted instance.
|
||||||
|
env.allowRemoteModels = true;
|
||||||
|
env.useBrowserCache = true;
|
||||||
|
const pipe = await pipeline(
|
||||||
|
'feature-extraction',
|
||||||
|
'Xenova/all-MiniLM-L6-v2',
|
||||||
|
{ quantized: true }
|
||||||
|
);
|
||||||
|
window._categoryPipelineReady = true;
|
||||||
|
return pipe;
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[EverShelf] Embedding model unavailable, regex fallback only:', e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
return window._categoryPipelinePromise;
|
||||||
|
};
|
||||||
|
</script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user