Files
sql/pgvector/pgvector.sql

96 lines
3.8 KiB
MySQL
Raw Normal View History

2025-09-14 22:12:09 +02:00
-- 1. Ajouter les extensions
create extension if not exists vector;
create extension if not exists isn;
-- 2. Créer la table des produits
2025-09-14 17:46:21 +02:00
create table produit (
id bigint primary key,
2025-09-14 22:12:09 +02:00
ean13 EAN13 null,
2025-09-14 17:46:21 +02:00
nom text not null,
marque text null,
categorie text null,
energie int not null,
proteines float4 null,
glucides float4 null,
sucres float4 null,
graisses float4 null,
graisses_saturees float4 null,
sel float4 null,
fibres float4 null,
nutriscore int null,
additifs int null,
additifs_list text[] null,
potassium float null,
calcium float null,
magnesium float null,
sodium float null,
chlorure float null,
sulfate float null,
nitrate float null,
hydrogenocarbonate float null,
silice float null,
fluor float null,
residu float null,
ph float null,
vitamin_a float null,
vitamin_c float null
);
2025-09-14 22:12:09 +02:00
-- 3. ajouter les commentaires
2025-09-14 17:46:21 +02:00
comment on column produit.potassium IS 'K⁺ en mg/L';
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
comment on column produit.sodium IS 'Na⁺ en mg/L';
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
2025-09-14 22:12:09 +02:00
comment on column produit.silice IS 'SiO₂ en mg/L';
2025-09-14 17:46:21 +02:00
comment on column produit.fluor IS 'F en mg/L';
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
2025-09-14 22:12:09 +02:00
-- 5. ajouter une colonne vecteur
alter table produit
2025-09-14 17:46:21 +02:00
add column nutrition vector(10);
2025-09-14 22:12:09 +02:00
-- 6. Création de lindex ivfflat
create index produit_nutrition_hnsw
on produit
using hnsw (nutrition vector_l2_ops)
with (m = 16, ef_construction = 200);
-- 7. calculer la moyenne et l'écart-type pour chaque colonne, puis construire le vecteur :
WITH stats AS (
SELECT
AVG(energie) AS mu_energie, STDDEV_SAMP(energie) AS sigma_energie,
AVG(proteines) AS mu_proteines, STDDEV_SAMP(proteines) AS sigma_proteines,
AVG(glucides) AS mu_glucides, STDDEV_SAMP(glucides) AS sigma_glucides,
AVG(sucres) AS mu_sucres, STDDEV_SAMP(sucres) AS sigma_sucres,
AVG(graisses) AS mu_graisses, STDDEV_SAMP(graisses) AS sigma_graisses,
AVG(graisses_saturees) AS mu_graisses_saturees, STDDEV_SAMP(graisses_saturees) AS sigma_graisses_saturees,
AVG(sel) AS mu_sel, STDDEV_SAMP(sel) AS sigma_sel,
AVG(fibres) AS mu_fibres, STDDEV_SAMP(fibres) AS sigma_fibres,
AVG(nutriscore) AS mu_nutriscore, STDDEV_SAMP(nutriscore) AS sigma_nutriscore,
AVG(additifs) AS mu_additifs, STDDEV_SAMP(additifs) AS sigma_additifs
FROM produit
)
UPDATE produit
SET nutrition = (
SELECT ARRAY[
((COALESCE(energie, mu_energie) - mu_energie) / NULLIF(sigma_energie,0)),
((COALESCE(proteines, mu_proteines) - mu_proteines) / NULLIF(sigma_proteines,0)),
((COALESCE(glucides, mu_glucides) - mu_glucides) / NULLIF(sigma_glucides,0)),
((COALESCE(sucres, mu_sucres) - mu_sucres) / NULLIF(sigma_sucres,0)),
((COALESCE(graisses, mu_graisses) - mu_graisses) / NULLIF(sigma_graisses,0)),
((COALESCE(graisses_saturees, mu_graisses_saturees) - mu_graisses_saturees) / NULLIF(sigma_graisses_saturees,0)),
((COALESCE(sel, mu_sel) - mu_sel) / NULLIF(sigma_sel,0)),
((COALESCE(fibres, mu_fibres) - mu_fibres) / NULLIF(sigma_fibres,0)),
((COALESCE(nutriscore, mu_nutriscore) - mu_nutriscore) / NULLIF(sigma_nutriscore,0)),
((COALESCE(additifs, mu_additifs) - mu_additifs) / NULLIF(sigma_additifs,0))
]::vector
FROM stats
);