This commit is contained in:
2025-09-14 22:12:09 +02:00
parent 8eaa75ba5f
commit 8d30a0a4bc
11 changed files with 6691 additions and 6582 deletions

4
data/marque.csv Normal file
View File

@@ -0,0 +1,4 @@
id,marque
1,Gerblé
2,Nestlé
3,La Laitière
1 id marque
2 1 Gerblé
3 2 Nestlé
4 3 La Laitière

View File

@@ -1,2 +0,0 @@
id,marque
1,Gerblé
1 id marque
2 1 Gerblé

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,14 @@
id,nom,marque,categorie,energie,proteines,glucide,sucre,graisse,graisse_saturee,sel,fibres,nutriscore,additifs,additifs_list,potassium,calcium,magnesium,sodium,chlorure,sulfate,nitrate,hydrogenocarbonate,silice,fluor,residu,ph,vitamin_a,vitamin_c id,ean13,nom,marque,categorie,energie,proteines,glucides,sucres,graisses,graisses_saturees,sel,fibres,nutriscore,additifs,additifs_list,potassium,calcium,magnesium,sodium,chlorure,sulfate,nitrate,hydrogenocarbonate,silice,fluor,residu,ph,vitamin_a,vitamin_c
1401675,Confiture rhubarbe rouge,Biocoop,Jams,234,0.6,57,56,0.5,0.1,0.07,,10,1,"{""E440 - Pectins""}",,,,,,,,,,,,,, 1401675,,Confiture rhubarbe rouge,Biocoop,Jams,234,0.6,57,56,0.5,0.1,0.07,,10,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
1634268,Confiture dabricot,Biocoop,Jams,232,0.5,64,61,0.5,0.1,0.04,0,11,0,,,,,,,,,,,,,,, 1634268,,Confiture dabricot,Biocoop,Jams,232,0.5,64,61,0.5,0.1,0.04,0,11,0,,,,,,,,,,,,,,,
1634304,Confiture rhubarbe,Biocoop,Jams,231,0.4,56,55.5,0.1,0.1,0.01,2,9,0,,,,,,,,,,,,,,, 1634304,,Confiture rhubarbe,Biocoop,Jams,231,0.4,56,55.5,0.1,0.1,0.01,2,9,0,,,,,,,,,,,,,,,
1634305,Confiture de fraise,Biocoop,Jams,237,0.5,57.6,57.1,0.2,0.1,0.01,1.2,10,0,,,,,,,,,,,,,,, 1634305,,Confiture de fraise,Biocoop,Jams,237,0.5,57.6,57.1,0.2,0.1,0.01,1.2,10,0,,,,,,,,,,,,,,,
1634306,Confiture de fruits rouges,Biocoop,Jams,243,0.6,57.9,56.7,0.2,0.1,0.01,2.9,9,0,,,,,,,,,,,,,,, 1634306,,Confiture de fruits rouges,Biocoop,Jams,243,0.6,57.9,56.7,0.2,0.1,0.01,2.9,9,0,,,,,,,,,,,,,,,
1634490,Confiture de framboise,Biocoop,Jams,242,0.5,63,55,0.5,0.1,0.03,,12,0,,,,,,,,,,,,,,, 1634490,,Confiture de framboise,Biocoop,Jams,242,0.5,63,55,0.5,0.1,0.03,,12,0,,,,,,,,,,,,,,,
1674818,"Confiture orange, citron et pamplemousse",Elibio,Jams,198,0.5,48,48,40.5,0.1,0.05,1.3,10,2,"{""E300 - Ascorbic acid"",""E440 - Pectins""}",,,,,,,,,,,,,, 1674818,,"Confiture orange, citron et pamplemousse",Elibio,Jams,198,0.5,48,48,40.5,0.1,0.05,1.3,10,2,"{""E300 - Ascorbic acid"",""E440 - Pectins""}",,,,,,,,,,,,,,
1674819,Préparation de fraises,Elibio,Jams,178,0.5,45,44,0.5,0.1,0.05,1.3,9,1,"{""E440 - Pectins""}",,,,,,,,,,,,,, 1674819,,Préparation de fraises,Elibio,Jams,178,0.5,45,44,0.5,0.1,0.05,1.3,9,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
1227545,Confiture de framboise,Gerblé,Jams,82,0.6,15,2.4,0,0,0.04,15,-5,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,, 1227545,,Confiture de framboise,Gerblé,Jams,82,0.6,15,2.4,0,0,0.04,15,-5,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
1228640,Confiture de fraise,Gerblé,Jams,76,0.5,13,3.2,0,0,0.03,14,-6,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,, 1228640,,Confiture de fraise,Gerblé,Jams,76,0.5,13,3.2,0,0,0.03,14,-6,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
1393224,La tomate cerise de marianne,Gerblé,Jams,77,0,15,4,0,0,0.03,12,-6,8,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E300 - Ascorbic acid"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,, 1393224,,La tomate cerise de marianne,Gerblé,Jams,77,0,15,4,0,0,0.03,12,-6,8,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E300 - Ascorbic acid"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
3106829,Confiture dabricot,Gerblé,Jams,77,0,15,4,0,0,15,12,4,,,,,,,,,,,,,,,, 3106829,,Confiture dabricot,Gerblé,Jams,77,0,15,4,0,0,15,12,4,,,,,,,,,,,,,,,,
1775102,Confiture de framboise,Mövenpick,Jams,189,0.7,46.2,45.9,0.2,0,0.03,,11,2,"{""E330 - Citric acid"",""E440a - Pectin""}",,,,,,,,,,,,,, 1775102,,Confiture de framboise,Mövenpick,Jams,189,0.7,46.2,45.9,0.2,0,0.03,,11,2,"{""E330 - Citric acid"",""E440a - Pectin""}",,,,,,,,,,,,,,
1 id ean13 nom marque categorie energie proteines glucide glucides sucre sucres graisse graisses graisse_saturee graisses_saturees sel fibres nutriscore additifs additifs_list potassium calcium magnesium sodium chlorure sulfate nitrate hydrogenocarbonate silice fluor residu ph vitamin_a vitamin_c
2 1401675 Confiture rhubarbe rouge Biocoop Jams 234 0.6 57 56 0.5 0.1 0.07 10 1 {"E440 - Pectins"}
3 1634268 Confiture d’abricot Biocoop Jams 232 0.5 64 61 0.5 0.1 0.04 0 11 0
4 1634304 Confiture rhubarbe Biocoop Jams 231 0.4 56 55.5 0.1 0.1 0.01 2 9 0
5 1634305 Confiture de fraise Biocoop Jams 237 0.5 57.6 57.1 0.2 0.1 0.01 1.2 10 0
6 1634306 Confiture de fruits rouges Biocoop Jams 243 0.6 57.9 56.7 0.2 0.1 0.01 2.9 9 0
7 1634490 Confiture de framboise Biocoop Jams 242 0.5 63 55 0.5 0.1 0.03 12 0
8 1674818 Confiture orange, citron et pamplemousse Elibio Jams 198 0.5 48 48 40.5 0.1 0.05 1.3 10 2 {"E300 - Ascorbic acid","E440 - Pectins"}
9 1674819 Préparation de fraises Elibio Jams 178 0.5 45 44 0.5 0.1 0.05 1.3 9 1 {"E440 - Pectins"}
10 1227545 Confiture de framboise Gerblé Jams 82 0.6 15 2.4 0 0 0.04 15 -5 7 {"E1400 - Dextrin","E202 - Potassium sorbate","E330 - Citric acid","E333 - Calcium citrates","E440 - Pectins","E955 - Sucralose","E965 - Maltitol"}
11 1228640 Confiture de fraise Gerblé Jams 76 0.5 13 3.2 0 0 0.03 14 -6 7 {"E1400 - Dextrin","E202 - Potassium sorbate","E330 - Citric acid","E333 - Calcium citrates","E440 - Pectins","E955 - Sucralose","E965 - Maltitol"}
12 1393224 La tomate cerise de marianne Gerblé Jams 77 0 15 4 0 0 0.03 12 -6 8 {"E1400 - Dextrin","E202 - Potassium sorbate","E300 - Ascorbic acid","E330 - Citric acid","E333 - Calcium citrates","E440 - Pectins","E955 - Sucralose","E965 - Maltitol"}
13 3106829 Confiture d’abricot Gerblé Jams 77 0 15 4 0 0 15 12 4
14 1775102 Confiture de framboise Mövenpick Jams 189 0.7 46.2 45.9 0.2 0 0.03 11 2 {"E330 - Citric acid","E440a - Pectin"}

View File

@@ -2,7 +2,6 @@ create extension if not exists ltree;
create extension if not exists pgtap; create extension if not exists pgtap;
create extension if not exists postgis; create extension if not exists postgis;
create extension if not exists pgrouting; create extension if not exists pgrouting;
create extension if not exists vector;
create extension if not exists pgcrypto; create extension if not exists pgcrypto;
create table adherent ( create table adherent (
@@ -46,6 +45,11 @@ create table ligne (
quantite decimal quantite decimal
); );
create table marque (
id int primary key,
marque text not null
);
create table fournisseur ( create table fournisseur (
id int primary key, id int primary key,
fournisseur text not null fournisseur text not null

View File

@@ -3,9 +3,14 @@ truncate table famille;
truncate table article; truncate table article;
truncate table ticket; truncate table ticket;
truncate table ligne; truncate table ligne;
truncate table marque;
truncate table fournisseur;
\COPY adherent FROM '/tmp/adherent.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY adherent FROM '/tmp/adherent.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY famille FROM '/tmp/famille.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY famille FROM '/tmp/famille.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY article FROM '/tmp/article.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY article FROM '/tmp/article.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY ticket FROM '/tmp/ticket.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY ticket FROM '/tmp/ticket.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY ligne FROM '/tmp/ligne.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY ligne FROM '/tmp/ligne.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY marque FROM '/tmp/marque.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY fournisseur FROM '/tmp/marque.csv' (FORMAT CSV, header, ENCODING 'UTF8');

View File

@@ -0,0 +1 @@
\dx

View File

@@ -0,0 +1,55 @@
Parfait 👍 Alors, puisque vous avez vos vecteurs nutritionnels dans une colonne nutrition vector (normalisés avec un Z-score), vous pouvez utiliser lextension pgvector pour faire une classification KNN directement dans PostgreSQL.
Voici un exemple de requête complète en k-nearest neighbors (kNN) :
-- Supposons que vous avez :
-- table produit(id serial, nom text, famille text, nutrition vector)
-- Exemple : on veut classifier un produit inconnu
```sql
WITH nouveau AS (
SELECT
ARRAY[
80, -- énergie (kcal/100g)
3.5, -- protéines
12, -- glucides
11, -- sucres
2.0, -- graisses
0.8, -- graisses_saturées
0.1, -- sel
1.2, -- fibres
5, -- nutriscore numérique
0 -- nombre d'additifs
]::vector AS nutrition
)
SELECT p.famille,
COUNT(*) AS voisins,
ROUND(AVG(p.nutrition <-> n.nutrition)::numeric, 3) AS distance_moyenne
FROM produit p
JOIN nouveau n ON true
ORDER BY p.nutrition <-> n.nutrition -- tri par distance euclidienne
LIMIT 5; -- on récupère les 5 plus proches voisins
```
```sql
Étape suivante : classification majoritaire
Pour prédire la famille (yaourt ou confiture), on peut compter la famille majoritaire parmi les k plus proches voisins :
WITH nouveau AS (
SELECT ARRAY[80, 3.5, 12, 11, 2.0, 0.8, 0.1, 1.2, 5, 0]::vector AS nutrition
),
voisins AS (
SELECT p.famille
FROM produit p
JOIN nouveau n ON true
ORDER BY p.nutrition <-> n.nutrition
LIMIT 5 -- k=5
)
SELECT famille, COUNT(*) AS occurrences
FROM voisins
GROUP BY famille
ORDER BY occurrences DESC
LIMIT 1; -- famille prédite
```

View File

@@ -1,6 +1,11 @@
-- 1. Ajouter les extensions
create extension if not exists vector;
create extension if not exists isn;
-- 2. Créer la table des produits
create table produit ( create table produit (
id bigint primary key, id bigint primary key,
ean13 EAN13 null ean13 EAN13 null,
nom text not null, nom text not null,
marque text null, marque text null,
categorie text null, categorie text null,
@@ -31,6 +36,7 @@ create table produit (
vitamin_c float null vitamin_c float null
); );
-- 3. ajouter les commentaires
comment on column produit.potassium IS 'K⁺ en mg/L'; comment on column produit.potassium IS 'K⁺ en mg/L';
comment on column produit.calcium IS 'Ca²⁺ en mg/L'; comment on column produit.calcium IS 'Ca²⁺ en mg/L';
comment on column produit.magnesium IS 'Mg²⁺ en mg/L'; comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
@@ -39,15 +45,51 @@ comment on column produit.chlorure IS 'Cl⁻ en mg/L';
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L'; comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
comment on column produit.nitrate IS 'NO₃⁻ en mg/L'; comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L'; comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
comment on column produit.silice IS s'SiO₂ en mg/L'; comment on column produit.silice IS 'SiO₂ en mg/L';
comment on column produit.fluor IS 'F en mg/L'; comment on column produit.fluor IS 'F en mg/L';
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8'); \COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
alter table produits -- 5. ajouter une colonne vecteur
alter table produit
add column nutrition vector(10); add column nutrition vector(10);
update produits -- 6. Création de lindex ivfflat
set nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector; create index produit_nutrition_hnsw
on produit
using hnsw (nutrition vector_l2_ops)
with (m = 16, ef_construction = 200);
-- 7. calculer la moyenne et l'écart-type pour chaque colonne, puis construire le vecteur :
WITH stats AS (
SELECT
AVG(energie) AS mu_energie, STDDEV_SAMP(energie) AS sigma_energie,
AVG(proteines) AS mu_proteines, STDDEV_SAMP(proteines) AS sigma_proteines,
AVG(glucides) AS mu_glucides, STDDEV_SAMP(glucides) AS sigma_glucides,
AVG(sucres) AS mu_sucres, STDDEV_SAMP(sucres) AS sigma_sucres,
AVG(graisses) AS mu_graisses, STDDEV_SAMP(graisses) AS sigma_graisses,
AVG(graisses_saturees) AS mu_graisses_saturees, STDDEV_SAMP(graisses_saturees) AS sigma_graisses_saturees,
AVG(sel) AS mu_sel, STDDEV_SAMP(sel) AS sigma_sel,
AVG(fibres) AS mu_fibres, STDDEV_SAMP(fibres) AS sigma_fibres,
AVG(nutriscore) AS mu_nutriscore, STDDEV_SAMP(nutriscore) AS sigma_nutriscore,
AVG(additifs) AS mu_additifs, STDDEV_SAMP(additifs) AS sigma_additifs
FROM produit
)
UPDATE produit
SET nutrition = (
SELECT ARRAY[
((COALESCE(energie, mu_energie) - mu_energie) / NULLIF(sigma_energie,0)),
((COALESCE(proteines, mu_proteines) - mu_proteines) / NULLIF(sigma_proteines,0)),
((COALESCE(glucides, mu_glucides) - mu_glucides) / NULLIF(sigma_glucides,0)),
((COALESCE(sucres, mu_sucres) - mu_sucres) / NULLIF(sigma_sucres,0)),
((COALESCE(graisses, mu_graisses) - mu_graisses) / NULLIF(sigma_graisses,0)),
((COALESCE(graisses_saturees, mu_graisses_saturees) - mu_graisses_saturees) / NULLIF(sigma_graisses_saturees,0)),
((COALESCE(sel, mu_sel) - mu_sel) / NULLIF(sigma_sel,0)),
((COALESCE(fibres, mu_fibres) - mu_fibres) / NULLIF(sigma_fibres,0)),
((COALESCE(nutriscore, mu_nutriscore) - mu_nutriscore) / NULLIF(sigma_nutriscore,0)),
((COALESCE(additifs, mu_additifs) - mu_additifs) / NULLIF(sigma_additifs,0))
]::vector
FROM stats
);