Files
sql/pgvector/pgvector.sql
2025-09-15 07:13:30 +02:00

97 lines
3.8 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- 1. Ajouter les extensions
create extension if not exists vector;
create extension if not exists isn;
-- 2. Créer la table des produits
create table produit (
id bigint primary key,
ean13 EAN13 null,
nom text not null,
marque text null,
categorie text null,
energie int not null,
proteines float4 null,
glucides float4 null,
sucres float4 null,
graisses float4 null,
graisses_saturees float4 null,
sel float4 null,
fibres float4 null,
nutriscore int null,
additifs int null,
additifs_list text[] null,
potassium float null,
calcium float null,
magnesium float null,
sodium float null,
chlorure float null,
sulfate float null,
nitrate float null,
hydrogenocarbonate float null,
silice float null,
fluor float null,
residu float null,
ph float null,
vitamin_a float null,
vitamin_c float null
);
-- 3. ajouter les commentaires
comment on column produit.potassium IS 'K⁺ en mg/L';
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
comment on column produit.sodium IS 'Na⁺ en mg/L';
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
comment on column produit.silice IS 'SiO₂ en mg/L';
comment on column produit.fluor IS 'F en mg/L';
-- 4. importer les produits
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
-- 5. ajouter une colonne vecteur
alter table produit
add column nutrition vector(10);
-- 6. Création de lindex ivfflat
create index produit_nutrition_hnsw
on produit
using hnsw (nutrition vector_l2_ops)
with (m = 16, ef_construction = 200);
-- 7. calculer la moyenne et l'écart-type pour chaque colonne, puis construire le vecteur :
WITH stats AS (
SELECT
AVG(energie) AS mu_energie, STDDEV_SAMP(energie) AS sigma_energie,
AVG(proteines) AS mu_proteines, STDDEV_SAMP(proteines) AS sigma_proteines,
AVG(glucides) AS mu_glucides, STDDEV_SAMP(glucides) AS sigma_glucides,
AVG(sucres) AS mu_sucres, STDDEV_SAMP(sucres) AS sigma_sucres,
AVG(graisses) AS mu_graisses, STDDEV_SAMP(graisses) AS sigma_graisses,
AVG(graisses_saturees) AS mu_graisses_saturees, STDDEV_SAMP(graisses_saturees) AS sigma_graisses_saturees,
AVG(sel) AS mu_sel, STDDEV_SAMP(sel) AS sigma_sel,
AVG(fibres) AS mu_fibres, STDDEV_SAMP(fibres) AS sigma_fibres,
AVG(nutriscore) AS mu_nutriscore, STDDEV_SAMP(nutriscore) AS sigma_nutriscore,
AVG(additifs) AS mu_additifs, STDDEV_SAMP(additifs) AS sigma_additifs
FROM produit
)
UPDATE produit
SET nutrition = (
SELECT ARRAY[
((COALESCE(energie, mu_energie) - mu_energie) / NULLIF(sigma_energie,0)),
((COALESCE(proteines, mu_proteines) - mu_proteines) / NULLIF(sigma_proteines,0)),
((COALESCE(glucides, mu_glucides) - mu_glucides) / NULLIF(sigma_glucides,0)),
((COALESCE(sucres, mu_sucres) - mu_sucres) / NULLIF(sigma_sucres,0)),
((COALESCE(graisses, mu_graisses) - mu_graisses) / NULLIF(sigma_graisses,0)),
((COALESCE(graisses_saturees, mu_graisses_saturees) - mu_graisses_saturees) / NULLIF(sigma_graisses_saturees,0)),
((COALESCE(sel, mu_sel) - mu_sel) / NULLIF(sigma_sel,0)),
((COALESCE(fibres, mu_fibres) - mu_fibres) / NULLIF(sigma_fibres,0)),
((COALESCE(nutriscore, mu_nutriscore) - mu_nutriscore) / NULLIF(sigma_nutriscore,0)),
((COALESCE(additifs, mu_additifs) - mu_additifs) / NULLIF(sigma_additifs,0))
]::vector
FROM stats
);