diff --git a/README.md b/README.md index 8ef454f..e8449cf 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,7 @@ psql -h 127.0.0.1 -d sql -U sql -c "\copy (select * from ligne order by id) to ' psql -h 127.0.0.1 -d sql -U sql -c "\copy (select id, source, target, cost, reverse_cost, ST_AsText(geom) from route order by id) to './route.csv' with csv header" ``` + +### Jeux de données + +- https://www.kaggle.com/datasets/alexandrelemercier/food-detailed-nutritional-content diff --git a/docker-entrypoint-initdb.d/6_produit.sql b/docker-entrypoint-initdb.d/6_produit.sql index fef6d77..e69de29 100644 --- a/docker-entrypoint-initdb.d/6_produit.sql +++ b/docker-entrypoint-initdb.d/6_produit.sql @@ -1,44 +0,0 @@ -create table produit ( - id bigint primary key, - nom text not null, - marque text null, - categorie text null, - energie int not null, - proteines float4 null, - glucides float4 null, - sucres float4 null, - graisses float4 null, - graisses_saturees float4 null, - sel float4 null, - fibres float4 null, - nutriscore int null, - additifs int null, - additifs_list text[] null, - potassium float null, - calcium float null, - magnesium float null, - sodium float null, - chlorure float null, - sulfate float null, - nitrate float null, - hydrogenocarbonate float null, - silice float null, - fluor float null, - residu float null, - ph float null, - vitamin_a float null, - vitamin_c float null -); - -comment on column produit.potassium IS 'K⁺ en mg/L'; -comment on column produit.calcium IS 'Ca²⁺ en mg/L'; -comment on column produit.magnesium IS 'Mg²⁺ en mg/L'; -comment on column produit.sodium IS 'Na⁺ en mg/L'; -comment on column produit.chlorure IS 'Cl⁻ en mg/L'; -comment on column produit.sulfate IS 'SO₄²⁻ en mg/L'; -comment on column produit.nitrate IS 'NO₃⁻ en mg/L'; -comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L'; -comment on column produit.silice IS s'SiO₂ en mg/L'; -comment on column produit.fluor IS 'F en mg/L'; - -\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8'); diff --git a/openfoodfacts.http b/pgvector/openfoodfacts.http similarity index 100% rename from openfoodfacts.http rename to pgvector/openfoodfacts.http diff --git a/pgvector/pgvector.sql b/pgvector/pgvector.sql new file mode 100644 index 0000000..ac12bbf --- /dev/null +++ b/pgvector/pgvector.sql @@ -0,0 +1,53 @@ +create table produit ( + id bigint primary key, + ean13 EAN13 null + nom text not null, + marque text null, + categorie text null, + energie int not null, + proteines float4 null, + glucides float4 null, + sucres float4 null, + graisses float4 null, + graisses_saturees float4 null, + sel float4 null, + fibres float4 null, + nutriscore int null, + additifs int null, + additifs_list text[] null, + potassium float null, + calcium float null, + magnesium float null, + sodium float null, + chlorure float null, + sulfate float null, + nitrate float null, + hydrogenocarbonate float null, + silice float null, + fluor float null, + residu float null, + ph float null, + vitamin_a float null, + vitamin_c float null +); + +comment on column produit.potassium IS 'K⁺ en mg/L'; +comment on column produit.calcium IS 'Ca²⁺ en mg/L'; +comment on column produit.magnesium IS 'Mg²⁺ en mg/L'; +comment on column produit.sodium IS 'Na⁺ en mg/L'; +comment on column produit.chlorure IS 'Cl⁻ en mg/L'; +comment on column produit.sulfate IS 'SO₄²⁻ en mg/L'; +comment on column produit.nitrate IS 'NO₃⁻ en mg/L'; +comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L'; +comment on column produit.silice IS s'SiO₂ en mg/L'; +comment on column produit.fluor IS 'F en mg/L'; + +\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8'); +\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8'); +\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8'); + +alter table produits +add column nutrition vector(10); + +update produits +set nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector; diff --git a/pgvector/populate.sql b/pgvector/populate.sql new file mode 100644 index 0000000..76ae880 --- /dev/null +++ b/pgvector/populate.sql @@ -0,0 +1,25 @@ +with selection as ( +select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g", + "carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g", + "nutriscore_score", "additives_n", string_to_array(additives_en, ','), + "potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g", + main_category_en + from openfoodfacts_cleaned op + left join produit p on p.id = op.id +where brands like 'Nestlé%' + and p.id is null + and "energy-kcal_100g" is not null + and proteins_100g is not null + and fat_100g is not null + and "saturated-fat_100g" is not null + and carbohydrates_100g is not null + and sugars_100g is not null + and salt_100g is not null + and nutriscore_score is not null +) +insert into produit (id, nom, marque, + energie, graisse, graisse_saturee, + glucide, sucre, proteines, sel, + fibres, nutriscore, additifs, additifs_list, + potassium, calcium, vitamin_a, vitamin_c, categorie) +select * from selection; diff --git a/postgis/postgis.sql b/postgis/postgis.sql index 66d7e10..28b9d2e 100644 --- a/postgis/postgis.sql +++ b/postgis/postgis.sql @@ -5,7 +5,8 @@ create table point ( geom geometry(point, 4326) not null ); -create index idx_point_geom on point using gist (geom); +create index idx_point_geom + on point using gist (geom); create table route ( id serial primary key, @@ -15,7 +16,8 @@ create table route ( longueur double precision ); -create index idx_route_geom on route using gist (geom); +create index idx_route_geom + on route using gist (geom); create or replace function maj_longueur() returns trigger as $$ diff --git a/vector.sql b/vector.sql deleted file mode 100644 index 3cc3f77..0000000 --- a/vector.sql +++ /dev/null @@ -1,60 +0,0 @@ -CREATE TABLE produits ( - id SERIAL PRIMARY KEY, - nom TEXT, - caracteristiques VECTOR(6) -); - -INSERT INTO produits (nom, caracteristiques) VALUES -('Tomate', '[0.6, 0.3, 0.3, 1.0, 0.5, 1.0]'), -('Concombre', '[0.2, 0.1, 0.9, 1.0, 0.0, 0.0]'), -('Courgette', '[0.3, 0.1, 0.5, 1.0, 0.0, 0.0]'), -('Carotte', '[0.5, 0.1, 0.8, 0.5, 0.0, 1.0]'), -('Poivron', '[0.4, 0.2, 0.6, 1.0, 0.0, 1.0]'), -('Aubergine', '[0.2, 0.1, 0.4, 1.0, 0.0, 0.0]'), -('Pomme', '[0.8, 0.3, 0.9, 1.0, 1.0, 1.0]'), -('Poire', '[0.7, 0.2, 0.7, 1.0, 1.0, 0.0]'), -('Fraise', '[0.9, 0.4, 0.7, 1.0, 1.0, 1.0]'), -('Cerise', '[0.8, 0.3, 0.6, 1.0, 1.0, 1.0]'), -('Citron', '[0.2, 1.0, 0.6, 1.0, 1.0, 1.0]'), -('Orange', '[0.7, 0.6, 0.7, 1.0, 1.0, 1.0]'), -('Banane', '[0.9, 0.1, 0.2, 1.0, 1.0, 0.0]'), -('Raisin', '[0.8, 0.3, 0.5, 1.0, 1.0, 1.0]'), -('Pastèque', '[0.8, 0.2, 0.6, 1.0, 1.0, 1.0]'), -('Melon', '[0.9, 0.2, 0.5, 1.0, 1.0, 1.0]'), -('Betterave', '[0.5, 0.1, 0.4, 0.0, 0.0, 1.0]'), -('Radis', '[0.2, 0.3, 0.9, 0.5, 0.0, 1.0]'), -('Brocoli', '[0.1, 0.1, 0.6, 0.0, 0.0, 0.0]'), -('Chou-fleur', '[0.1, 0.1, 0.7, 0.0, 0.0, 0.0]'); - -alter table produits -add column nutrition vector(10); - -UPDATE produits -SET nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector; - - -with selection as ( -select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g", - "carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g", - "nutriscore_score", "additives_n", string_to_array(additives_en, ','), - "potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g", - main_category_en - from openfoodfacts_cleaned op - left join produit p on p.id = op.id -where brands like 'Familia%' - and p.id is null - and "energy-kcal_100g" is not null - and proteins_100g is not null - and fat_100g is not null - and "saturated-fat_100g" is not null - and carbohydrates_100g is not null - and sugars_100g is not null - and salt_100g is not null - and nutriscore_score is not null -) -insert into produit (id, nom, marque, - energie, graisse, graisse_saturee, - glucide, sucre, proteines, sel, - fibres, nutriscore, additifs, additifs_list, - potassium, calcium, vitamin_a, vitamin_c, categorie) -select * from selection;