97 lines
3.8 KiB
SQL
97 lines
3.8 KiB
SQL
-- 1. Ajouter les extensions
|
||
create extension if not exists vector;
|
||
create extension if not exists isn;
|
||
|
||
-- 2. Créer la table des produits
|
||
create table produit (
|
||
id bigint primary key,
|
||
ean13 EAN13 null,
|
||
nom text not null,
|
||
marque text null,
|
||
categorie text null,
|
||
energie int not null,
|
||
proteines float4 null,
|
||
glucides float4 null,
|
||
sucres float4 null,
|
||
graisses float4 null,
|
||
graisses_saturees float4 null,
|
||
sel float4 null,
|
||
fibres float4 null,
|
||
nutriscore int null,
|
||
additifs int null,
|
||
additifs_list text[] null,
|
||
potassium float null,
|
||
calcium float null,
|
||
magnesium float null,
|
||
sodium float null,
|
||
chlorure float null,
|
||
sulfate float null,
|
||
nitrate float null,
|
||
hydrogenocarbonate float null,
|
||
silice float null,
|
||
fluor float null,
|
||
residu float null,
|
||
ph float null,
|
||
vitamin_a float null,
|
||
vitamin_c float null
|
||
);
|
||
|
||
-- 3. ajouter les commentaires
|
||
comment on column produit.potassium IS 'K⁺ en mg/L';
|
||
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
||
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
||
comment on column produit.sodium IS 'Na⁺ en mg/L';
|
||
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
||
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
||
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
||
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
||
comment on column produit.silice IS 'SiO₂ en mg/L';
|
||
comment on column produit.fluor IS 'F en mg/L';
|
||
|
||
-- 4. importer les produits
|
||
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||
|
||
-- 5. ajouter une colonne vecteur
|
||
alter table produit
|
||
add column nutrition vector(10);
|
||
|
||
-- 6. Création de l’index ivfflat
|
||
create index produit_nutrition_hnsw
|
||
on produit
|
||
using hnsw (nutrition vector_l2_ops)
|
||
with (m = 16, ef_construction = 200);
|
||
|
||
-- 7. calculer la moyenne et l'écart-type pour chaque colonne, puis construire le vecteur :
|
||
WITH stats AS (
|
||
SELECT
|
||
AVG(energie) AS mu_energie, STDDEV_SAMP(energie) AS sigma_energie,
|
||
AVG(proteines) AS mu_proteines, STDDEV_SAMP(proteines) AS sigma_proteines,
|
||
AVG(glucides) AS mu_glucides, STDDEV_SAMP(glucides) AS sigma_glucides,
|
||
AVG(sucres) AS mu_sucres, STDDEV_SAMP(sucres) AS sigma_sucres,
|
||
AVG(graisses) AS mu_graisses, STDDEV_SAMP(graisses) AS sigma_graisses,
|
||
AVG(graisses_saturees) AS mu_graisses_saturees, STDDEV_SAMP(graisses_saturees) AS sigma_graisses_saturees,
|
||
AVG(sel) AS mu_sel, STDDEV_SAMP(sel) AS sigma_sel,
|
||
AVG(fibres) AS mu_fibres, STDDEV_SAMP(fibres) AS sigma_fibres,
|
||
AVG(nutriscore) AS mu_nutriscore, STDDEV_SAMP(nutriscore) AS sigma_nutriscore,
|
||
AVG(additifs) AS mu_additifs, STDDEV_SAMP(additifs) AS sigma_additifs
|
||
FROM produit
|
||
)
|
||
UPDATE produit
|
||
SET nutrition = (
|
||
SELECT ARRAY[
|
||
((COALESCE(energie, mu_energie) - mu_energie) / NULLIF(sigma_energie,0)),
|
||
((COALESCE(proteines, mu_proteines) - mu_proteines) / NULLIF(sigma_proteines,0)),
|
||
((COALESCE(glucides, mu_glucides) - mu_glucides) / NULLIF(sigma_glucides,0)),
|
||
((COALESCE(sucres, mu_sucres) - mu_sucres) / NULLIF(sigma_sucres,0)),
|
||
((COALESCE(graisses, mu_graisses) - mu_graisses) / NULLIF(sigma_graisses,0)),
|
||
((COALESCE(graisses_saturees, mu_graisses_saturees) - mu_graisses_saturees) / NULLIF(sigma_graisses_saturees,0)),
|
||
((COALESCE(sel, mu_sel) - mu_sel) / NULLIF(sigma_sel,0)),
|
||
((COALESCE(fibres, mu_fibres) - mu_fibres) / NULLIF(sigma_fibres,0)),
|
||
((COALESCE(nutriscore, mu_nutriscore) - mu_nutriscore) / NULLIF(sigma_nutriscore,0)),
|
||
((COALESCE(additifs, mu_additifs) - mu_additifs) / NULLIF(sigma_additifs,0))
|
||
]::vector
|
||
FROM stats
|
||
);
|