ean13
This commit is contained in:
4
data/marque.csv
Normal file
4
data/marque.csv
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
id,marque
|
||||||
|
1,Gerblé
|
||||||
|
2,Nestlé
|
||||||
|
3,La Laitière
|
||||||
|
@@ -1,2 +0,0 @@
|
|||||||
id,marque
|
|
||||||
1,Gerblé
|
|
||||||
|
10474
data/produit.csv
10474
data/produit.csv
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,14 @@
|
|||||||
id,nom,marque,categorie,energie,proteines,glucide,sucre,graisse,graisse_saturee,sel,fibres,nutriscore,additifs,additifs_list,potassium,calcium,magnesium,sodium,chlorure,sulfate,nitrate,hydrogenocarbonate,silice,fluor,residu,ph,vitamin_a,vitamin_c
|
id,ean13,nom,marque,categorie,energie,proteines,glucides,sucres,graisses,graisses_saturees,sel,fibres,nutriscore,additifs,additifs_list,potassium,calcium,magnesium,sodium,chlorure,sulfate,nitrate,hydrogenocarbonate,silice,fluor,residu,ph,vitamin_a,vitamin_c
|
||||||
1401675,Confiture rhubarbe rouge,Biocoop,Jams,234,0.6,57,56,0.5,0.1,0.07,,10,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
|
1401675,,Confiture rhubarbe rouge,Biocoop,Jams,234,0.6,57,56,0.5,0.1,0.07,,10,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
|
||||||
1634268,Confiture d’abricot,Biocoop,Jams,232,0.5,64,61,0.5,0.1,0.04,0,11,0,,,,,,,,,,,,,,,
|
1634268,,Confiture d’abricot,Biocoop,Jams,232,0.5,64,61,0.5,0.1,0.04,0,11,0,,,,,,,,,,,,,,,
|
||||||
1634304,Confiture rhubarbe,Biocoop,Jams,231,0.4,56,55.5,0.1,0.1,0.01,2,9,0,,,,,,,,,,,,,,,
|
1634304,,Confiture rhubarbe,Biocoop,Jams,231,0.4,56,55.5,0.1,0.1,0.01,2,9,0,,,,,,,,,,,,,,,
|
||||||
1634305,Confiture de fraise,Biocoop,Jams,237,0.5,57.6,57.1,0.2,0.1,0.01,1.2,10,0,,,,,,,,,,,,,,,
|
1634305,,Confiture de fraise,Biocoop,Jams,237,0.5,57.6,57.1,0.2,0.1,0.01,1.2,10,0,,,,,,,,,,,,,,,
|
||||||
1634306,Confiture de fruits rouges,Biocoop,Jams,243,0.6,57.9,56.7,0.2,0.1,0.01,2.9,9,0,,,,,,,,,,,,,,,
|
1634306,,Confiture de fruits rouges,Biocoop,Jams,243,0.6,57.9,56.7,0.2,0.1,0.01,2.9,9,0,,,,,,,,,,,,,,,
|
||||||
1634490,Confiture de framboise,Biocoop,Jams,242,0.5,63,55,0.5,0.1,0.03,,12,0,,,,,,,,,,,,,,,
|
1634490,,Confiture de framboise,Biocoop,Jams,242,0.5,63,55,0.5,0.1,0.03,,12,0,,,,,,,,,,,,,,,
|
||||||
1674818,"Confiture orange, citron et pamplemousse",Elibio,Jams,198,0.5,48,48,40.5,0.1,0.05,1.3,10,2,"{""E300 - Ascorbic acid"",""E440 - Pectins""}",,,,,,,,,,,,,,
|
1674818,,"Confiture orange, citron et pamplemousse",Elibio,Jams,198,0.5,48,48,40.5,0.1,0.05,1.3,10,2,"{""E300 - Ascorbic acid"",""E440 - Pectins""}",,,,,,,,,,,,,,
|
||||||
1674819,Préparation de fraises,Elibio,Jams,178,0.5,45,44,0.5,0.1,0.05,1.3,9,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
|
1674819,,Préparation de fraises,Elibio,Jams,178,0.5,45,44,0.5,0.1,0.05,1.3,9,1,"{""E440 - Pectins""}",,,,,,,,,,,,,,
|
||||||
1227545,Confiture de framboise,Gerblé,Jams,82,0.6,15,2.4,0,0,0.04,15,-5,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
1227545,,Confiture de framboise,Gerblé,Jams,82,0.6,15,2.4,0,0,0.04,15,-5,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
||||||
1228640,Confiture de fraise,Gerblé,Jams,76,0.5,13,3.2,0,0,0.03,14,-6,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
1228640,,Confiture de fraise,Gerblé,Jams,76,0.5,13,3.2,0,0,0.03,14,-6,7,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
||||||
1393224,La tomate cerise de marianne,Gerblé,Jams,77,0,15,4,0,0,0.03,12,-6,8,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E300 - Ascorbic acid"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
1393224,,La tomate cerise de marianne,Gerblé,Jams,77,0,15,4,0,0,0.03,12,-6,8,"{""E1400 - Dextrin"",""E202 - Potassium sorbate"",""E300 - Ascorbic acid"",""E330 - Citric acid"",""E333 - Calcium citrates"",""E440 - Pectins"",""E955 - Sucralose"",""E965 - Maltitol""}",,,,,,,,,,,,,,
|
||||||
3106829,Confiture d’abricot,Gerblé,Jams,77,0,15,4,0,0,15,12,4,,,,,,,,,,,,,,,,
|
3106829,,Confiture d’abricot,Gerblé,Jams,77,0,15,4,0,0,15,12,4,,,,,,,,,,,,,,,,
|
||||||
1775102,Confiture de framboise,Mövenpick,Jams,189,0.7,46.2,45.9,0.2,0,0.03,,11,2,"{""E330 - Citric acid"",""E440a - Pectin""}",,,,,,,,,,,,,,
|
1775102,,Confiture de framboise,Mövenpick,Jams,189,0.7,46.2,45.9,0.2,0,0.03,,11,2,"{""E330 - Citric acid"",""E440a - Pectin""}",,,,,,,,,,,,,,
|
||||||
|
|||||||
|
@@ -2,7 +2,6 @@ create extension if not exists ltree;
|
|||||||
create extension if not exists pgtap;
|
create extension if not exists pgtap;
|
||||||
create extension if not exists postgis;
|
create extension if not exists postgis;
|
||||||
create extension if not exists pgrouting;
|
create extension if not exists pgrouting;
|
||||||
create extension if not exists vector;
|
|
||||||
create extension if not exists pgcrypto;
|
create extension if not exists pgcrypto;
|
||||||
|
|
||||||
create table adherent (
|
create table adherent (
|
||||||
@@ -46,6 +45,11 @@ create table ligne (
|
|||||||
quantite decimal
|
quantite decimal
|
||||||
);
|
);
|
||||||
|
|
||||||
|
create table marque (
|
||||||
|
id int primary key,
|
||||||
|
marque text not null
|
||||||
|
);
|
||||||
|
|
||||||
create table fournisseur (
|
create table fournisseur (
|
||||||
id int primary key,
|
id int primary key,
|
||||||
fournisseur text not null
|
fournisseur text not null
|
||||||
|
|||||||
@@ -3,9 +3,14 @@ truncate table famille;
|
|||||||
truncate table article;
|
truncate table article;
|
||||||
truncate table ticket;
|
truncate table ticket;
|
||||||
truncate table ligne;
|
truncate table ligne;
|
||||||
|
truncate table marque;
|
||||||
|
truncate table fournisseur;
|
||||||
|
|
||||||
\COPY adherent FROM '/tmp/adherent.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY adherent FROM '/tmp/adherent.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY famille FROM '/tmp/famille.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY famille FROM '/tmp/famille.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY article FROM '/tmp/article.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY article FROM '/tmp/article.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY ticket FROM '/tmp/ticket.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY ticket FROM '/tmp/ticket.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY ligne FROM '/tmp/ligne.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY ligne FROM '/tmp/ligne.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
|
||||||
|
\COPY marque FROM '/tmp/marque.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
\COPY fournisseur FROM '/tmp/marque.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
|||||||
1
docker-entrypoint-initdb.d/6_end.sql
Normal file
1
docker-entrypoint-initdb.d/6_end.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
\dx
|
||||||
55
pgvector/classification.md
Normal file
55
pgvector/classification.md
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
Parfait 👍 Alors, puisque vous avez vos vecteurs nutritionnels dans une colonne nutrition vector (normalisés avec un Z-score), vous pouvez utiliser l’extension pgvector pour faire une classification KNN directement dans PostgreSQL.
|
||||||
|
|
||||||
|
Voici un exemple de requête complète en k-nearest neighbors (kNN) :
|
||||||
|
|
||||||
|
-- Supposons que vous avez :
|
||||||
|
-- table produit(id serial, nom text, famille text, nutrition vector)
|
||||||
|
|
||||||
|
-- Exemple : on veut classifier un produit inconnu
|
||||||
|
|
||||||
|
```sql
|
||||||
|
WITH nouveau AS (
|
||||||
|
SELECT
|
||||||
|
ARRAY[
|
||||||
|
80, -- énergie (kcal/100g)
|
||||||
|
3.5, -- protéines
|
||||||
|
12, -- glucides
|
||||||
|
11, -- sucres
|
||||||
|
2.0, -- graisses
|
||||||
|
0.8, -- graisses_saturées
|
||||||
|
0.1, -- sel
|
||||||
|
1.2, -- fibres
|
||||||
|
5, -- nutriscore numérique
|
||||||
|
0 -- nombre d'additifs
|
||||||
|
]::vector AS nutrition
|
||||||
|
)
|
||||||
|
SELECT p.famille,
|
||||||
|
COUNT(*) AS voisins,
|
||||||
|
ROUND(AVG(p.nutrition <-> n.nutrition)::numeric, 3) AS distance_moyenne
|
||||||
|
FROM produit p
|
||||||
|
JOIN nouveau n ON true
|
||||||
|
ORDER BY p.nutrition <-> n.nutrition -- tri par distance euclidienne
|
||||||
|
LIMIT 5; -- on récupère les 5 plus proches voisins
|
||||||
|
```
|
||||||
|
|
||||||
|
```sql
|
||||||
|
Étape suivante : classification majoritaire
|
||||||
|
|
||||||
|
Pour prédire la famille (yaourt ou confiture), on peut compter la famille majoritaire parmi les k plus proches voisins :
|
||||||
|
|
||||||
|
WITH nouveau AS (
|
||||||
|
SELECT ARRAY[80, 3.5, 12, 11, 2.0, 0.8, 0.1, 1.2, 5, 0]::vector AS nutrition
|
||||||
|
),
|
||||||
|
voisins AS (
|
||||||
|
SELECT p.famille
|
||||||
|
FROM produit p
|
||||||
|
JOIN nouveau n ON true
|
||||||
|
ORDER BY p.nutrition <-> n.nutrition
|
||||||
|
LIMIT 5 -- k=5
|
||||||
|
)
|
||||||
|
SELECT famille, COUNT(*) AS occurrences
|
||||||
|
FROM voisins
|
||||||
|
GROUP BY famille
|
||||||
|
ORDER BY occurrences DESC
|
||||||
|
LIMIT 1; -- famille prédite
|
||||||
|
```
|
||||||
@@ -1,6 +1,11 @@
|
|||||||
|
-- 1. Ajouter les extensions
|
||||||
|
create extension if not exists vector;
|
||||||
|
create extension if not exists isn;
|
||||||
|
|
||||||
|
-- 2. Créer la table des produits
|
||||||
create table produit (
|
create table produit (
|
||||||
id bigint primary key,
|
id bigint primary key,
|
||||||
ean13 EAN13 null
|
ean13 EAN13 null,
|
||||||
nom text not null,
|
nom text not null,
|
||||||
marque text null,
|
marque text null,
|
||||||
categorie text null,
|
categorie text null,
|
||||||
@@ -31,6 +36,7 @@ create table produit (
|
|||||||
vitamin_c float null
|
vitamin_c float null
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- 3. ajouter les commentaires
|
||||||
comment on column produit.potassium IS 'K⁺ en mg/L';
|
comment on column produit.potassium IS 'K⁺ en mg/L';
|
||||||
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
||||||
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
||||||
@@ -39,15 +45,51 @@ comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
|||||||
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
||||||
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
||||||
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
||||||
comment on column produit.silice IS s'SiO₂ en mg/L';
|
comment on column produit.silice IS 'SiO₂ en mg/L';
|
||||||
comment on column produit.fluor IS 'F en mg/L';
|
comment on column produit.fluor IS 'F en mg/L';
|
||||||
|
|
||||||
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
|
||||||
alter table produits
|
-- 5. ajouter une colonne vecteur
|
||||||
|
alter table produit
|
||||||
add column nutrition vector(10);
|
add column nutrition vector(10);
|
||||||
|
|
||||||
update produits
|
-- 6. Création de l’index ivfflat
|
||||||
set nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector;
|
create index produit_nutrition_hnsw
|
||||||
|
on produit
|
||||||
|
using hnsw (nutrition vector_l2_ops)
|
||||||
|
with (m = 16, ef_construction = 200);
|
||||||
|
|
||||||
|
-- 7. calculer la moyenne et l'écart-type pour chaque colonne, puis construire le vecteur :
|
||||||
|
WITH stats AS (
|
||||||
|
SELECT
|
||||||
|
AVG(energie) AS mu_energie, STDDEV_SAMP(energie) AS sigma_energie,
|
||||||
|
AVG(proteines) AS mu_proteines, STDDEV_SAMP(proteines) AS sigma_proteines,
|
||||||
|
AVG(glucides) AS mu_glucides, STDDEV_SAMP(glucides) AS sigma_glucides,
|
||||||
|
AVG(sucres) AS mu_sucres, STDDEV_SAMP(sucres) AS sigma_sucres,
|
||||||
|
AVG(graisses) AS mu_graisses, STDDEV_SAMP(graisses) AS sigma_graisses,
|
||||||
|
AVG(graisses_saturees) AS mu_graisses_saturees, STDDEV_SAMP(graisses_saturees) AS sigma_graisses_saturees,
|
||||||
|
AVG(sel) AS mu_sel, STDDEV_SAMP(sel) AS sigma_sel,
|
||||||
|
AVG(fibres) AS mu_fibres, STDDEV_SAMP(fibres) AS sigma_fibres,
|
||||||
|
AVG(nutriscore) AS mu_nutriscore, STDDEV_SAMP(nutriscore) AS sigma_nutriscore,
|
||||||
|
AVG(additifs) AS mu_additifs, STDDEV_SAMP(additifs) AS sigma_additifs
|
||||||
|
FROM produit
|
||||||
|
)
|
||||||
|
UPDATE produit
|
||||||
|
SET nutrition = (
|
||||||
|
SELECT ARRAY[
|
||||||
|
((COALESCE(energie, mu_energie) - mu_energie) / NULLIF(sigma_energie,0)),
|
||||||
|
((COALESCE(proteines, mu_proteines) - mu_proteines) / NULLIF(sigma_proteines,0)),
|
||||||
|
((COALESCE(glucides, mu_glucides) - mu_glucides) / NULLIF(sigma_glucides,0)),
|
||||||
|
((COALESCE(sucres, mu_sucres) - mu_sucres) / NULLIF(sigma_sucres,0)),
|
||||||
|
((COALESCE(graisses, mu_graisses) - mu_graisses) / NULLIF(sigma_graisses,0)),
|
||||||
|
((COALESCE(graisses_saturees, mu_graisses_saturees) - mu_graisses_saturees) / NULLIF(sigma_graisses_saturees,0)),
|
||||||
|
((COALESCE(sel, mu_sel) - mu_sel) / NULLIF(sigma_sel,0)),
|
||||||
|
((COALESCE(fibres, mu_fibres) - mu_fibres) / NULLIF(sigma_fibres,0)),
|
||||||
|
((COALESCE(nutriscore, mu_nutriscore) - mu_nutriscore) / NULLIF(sigma_nutriscore,0)),
|
||||||
|
((COALESCE(additifs, mu_additifs) - mu_additifs) / NULLIF(sigma_additifs,0))
|
||||||
|
]::vector
|
||||||
|
FROM stats
|
||||||
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user