kaggle
This commit is contained in:
@@ -27,3 +27,7 @@ psql -h 127.0.0.1 -d sql -U sql -c "\copy (select * from ligne order by id) to '
|
|||||||
psql -h 127.0.0.1 -d sql -U sql -c "\copy (select id, source, target, cost, reverse_cost, ST_AsText(geom) from route order by id) to './route.csv' with csv header"
|
psql -h 127.0.0.1 -d sql -U sql -c "\copy (select id, source, target, cost, reverse_cost, ST_AsText(geom) from route order by id) to './route.csv' with csv header"
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Jeux de données
|
||||||
|
|
||||||
|
- https://www.kaggle.com/datasets/alexandrelemercier/food-detailed-nutritional-content
|
||||||
|
|||||||
@@ -1,44 +0,0 @@
|
|||||||
create table produit (
|
|
||||||
id bigint primary key,
|
|
||||||
nom text not null,
|
|
||||||
marque text null,
|
|
||||||
categorie text null,
|
|
||||||
energie int not null,
|
|
||||||
proteines float4 null,
|
|
||||||
glucides float4 null,
|
|
||||||
sucres float4 null,
|
|
||||||
graisses float4 null,
|
|
||||||
graisses_saturees float4 null,
|
|
||||||
sel float4 null,
|
|
||||||
fibres float4 null,
|
|
||||||
nutriscore int null,
|
|
||||||
additifs int null,
|
|
||||||
additifs_list text[] null,
|
|
||||||
potassium float null,
|
|
||||||
calcium float null,
|
|
||||||
magnesium float null,
|
|
||||||
sodium float null,
|
|
||||||
chlorure float null,
|
|
||||||
sulfate float null,
|
|
||||||
nitrate float null,
|
|
||||||
hydrogenocarbonate float null,
|
|
||||||
silice float null,
|
|
||||||
fluor float null,
|
|
||||||
residu float null,
|
|
||||||
ph float null,
|
|
||||||
vitamin_a float null,
|
|
||||||
vitamin_c float null
|
|
||||||
);
|
|
||||||
|
|
||||||
comment on column produit.potassium IS 'K⁺ en mg/L';
|
|
||||||
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
|
||||||
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
|
||||||
comment on column produit.sodium IS 'Na⁺ en mg/L';
|
|
||||||
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
|
||||||
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
|
||||||
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
|
||||||
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
|
||||||
comment on column produit.silice IS s'SiO₂ en mg/L';
|
|
||||||
comment on column produit.fluor IS 'F en mg/L';
|
|
||||||
|
|
||||||
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
|
||||||
|
|||||||
53
pgvector/pgvector.sql
Normal file
53
pgvector/pgvector.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
create table produit (
|
||||||
|
id bigint primary key,
|
||||||
|
ean13 EAN13 null
|
||||||
|
nom text not null,
|
||||||
|
marque text null,
|
||||||
|
categorie text null,
|
||||||
|
energie int not null,
|
||||||
|
proteines float4 null,
|
||||||
|
glucides float4 null,
|
||||||
|
sucres float4 null,
|
||||||
|
graisses float4 null,
|
||||||
|
graisses_saturees float4 null,
|
||||||
|
sel float4 null,
|
||||||
|
fibres float4 null,
|
||||||
|
nutriscore int null,
|
||||||
|
additifs int null,
|
||||||
|
additifs_list text[] null,
|
||||||
|
potassium float null,
|
||||||
|
calcium float null,
|
||||||
|
magnesium float null,
|
||||||
|
sodium float null,
|
||||||
|
chlorure float null,
|
||||||
|
sulfate float null,
|
||||||
|
nitrate float null,
|
||||||
|
hydrogenocarbonate float null,
|
||||||
|
silice float null,
|
||||||
|
fluor float null,
|
||||||
|
residu float null,
|
||||||
|
ph float null,
|
||||||
|
vitamin_a float null,
|
||||||
|
vitamin_c float null
|
||||||
|
);
|
||||||
|
|
||||||
|
comment on column produit.potassium IS 'K⁺ en mg/L';
|
||||||
|
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
||||||
|
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
||||||
|
comment on column produit.sodium IS 'Na⁺ en mg/L';
|
||||||
|
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
||||||
|
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
||||||
|
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
||||||
|
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
||||||
|
comment on column produit.silice IS s'SiO₂ en mg/L';
|
||||||
|
comment on column produit.fluor IS 'F en mg/L';
|
||||||
|
|
||||||
|
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||||
|
|
||||||
|
alter table produits
|
||||||
|
add column nutrition vector(10);
|
||||||
|
|
||||||
|
update produits
|
||||||
|
set nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector;
|
||||||
25
pgvector/populate.sql
Normal file
25
pgvector/populate.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
with selection as (
|
||||||
|
select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g",
|
||||||
|
"carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g",
|
||||||
|
"nutriscore_score", "additives_n", string_to_array(additives_en, ','),
|
||||||
|
"potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g",
|
||||||
|
main_category_en
|
||||||
|
from openfoodfacts_cleaned op
|
||||||
|
left join produit p on p.id = op.id
|
||||||
|
where brands like 'Nestlé%'
|
||||||
|
and p.id is null
|
||||||
|
and "energy-kcal_100g" is not null
|
||||||
|
and proteins_100g is not null
|
||||||
|
and fat_100g is not null
|
||||||
|
and "saturated-fat_100g" is not null
|
||||||
|
and carbohydrates_100g is not null
|
||||||
|
and sugars_100g is not null
|
||||||
|
and salt_100g is not null
|
||||||
|
and nutriscore_score is not null
|
||||||
|
)
|
||||||
|
insert into produit (id, nom, marque,
|
||||||
|
energie, graisse, graisse_saturee,
|
||||||
|
glucide, sucre, proteines, sel,
|
||||||
|
fibres, nutriscore, additifs, additifs_list,
|
||||||
|
potassium, calcium, vitamin_a, vitamin_c, categorie)
|
||||||
|
select * from selection;
|
||||||
@@ -5,7 +5,8 @@ create table point (
|
|||||||
geom geometry(point, 4326) not null
|
geom geometry(point, 4326) not null
|
||||||
);
|
);
|
||||||
|
|
||||||
create index idx_point_geom on point using gist (geom);
|
create index idx_point_geom
|
||||||
|
on point using gist (geom);
|
||||||
|
|
||||||
create table route (
|
create table route (
|
||||||
id serial primary key,
|
id serial primary key,
|
||||||
@@ -15,7 +16,8 @@ create table route (
|
|||||||
longueur double precision
|
longueur double precision
|
||||||
);
|
);
|
||||||
|
|
||||||
create index idx_route_geom on route using gist (geom);
|
create index idx_route_geom
|
||||||
|
on route using gist (geom);
|
||||||
|
|
||||||
create or replace function maj_longueur()
|
create or replace function maj_longueur()
|
||||||
returns trigger as $$
|
returns trigger as $$
|
||||||
|
|||||||
60
vector.sql
60
vector.sql
@@ -1,60 +0,0 @@
|
|||||||
CREATE TABLE produits (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
nom TEXT,
|
|
||||||
caracteristiques VECTOR(6)
|
|
||||||
);
|
|
||||||
|
|
||||||
INSERT INTO produits (nom, caracteristiques) VALUES
|
|
||||||
('Tomate', '[0.6, 0.3, 0.3, 1.0, 0.5, 1.0]'),
|
|
||||||
('Concombre', '[0.2, 0.1, 0.9, 1.0, 0.0, 0.0]'),
|
|
||||||
('Courgette', '[0.3, 0.1, 0.5, 1.0, 0.0, 0.0]'),
|
|
||||||
('Carotte', '[0.5, 0.1, 0.8, 0.5, 0.0, 1.0]'),
|
|
||||||
('Poivron', '[0.4, 0.2, 0.6, 1.0, 0.0, 1.0]'),
|
|
||||||
('Aubergine', '[0.2, 0.1, 0.4, 1.0, 0.0, 0.0]'),
|
|
||||||
('Pomme', '[0.8, 0.3, 0.9, 1.0, 1.0, 1.0]'),
|
|
||||||
('Poire', '[0.7, 0.2, 0.7, 1.0, 1.0, 0.0]'),
|
|
||||||
('Fraise', '[0.9, 0.4, 0.7, 1.0, 1.0, 1.0]'),
|
|
||||||
('Cerise', '[0.8, 0.3, 0.6, 1.0, 1.0, 1.0]'),
|
|
||||||
('Citron', '[0.2, 1.0, 0.6, 1.0, 1.0, 1.0]'),
|
|
||||||
('Orange', '[0.7, 0.6, 0.7, 1.0, 1.0, 1.0]'),
|
|
||||||
('Banane', '[0.9, 0.1, 0.2, 1.0, 1.0, 0.0]'),
|
|
||||||
('Raisin', '[0.8, 0.3, 0.5, 1.0, 1.0, 1.0]'),
|
|
||||||
('Pastèque', '[0.8, 0.2, 0.6, 1.0, 1.0, 1.0]'),
|
|
||||||
('Melon', '[0.9, 0.2, 0.5, 1.0, 1.0, 1.0]'),
|
|
||||||
('Betterave', '[0.5, 0.1, 0.4, 0.0, 0.0, 1.0]'),
|
|
||||||
('Radis', '[0.2, 0.3, 0.9, 0.5, 0.0, 1.0]'),
|
|
||||||
('Brocoli', '[0.1, 0.1, 0.6, 0.0, 0.0, 0.0]'),
|
|
||||||
('Chou-fleur', '[0.1, 0.1, 0.7, 0.0, 0.0, 0.0]');
|
|
||||||
|
|
||||||
alter table produits
|
|
||||||
add column nutrition vector(10);
|
|
||||||
|
|
||||||
UPDATE produits
|
|
||||||
SET nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector;
|
|
||||||
|
|
||||||
|
|
||||||
with selection as (
|
|
||||||
select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g",
|
|
||||||
"carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g",
|
|
||||||
"nutriscore_score", "additives_n", string_to_array(additives_en, ','),
|
|
||||||
"potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g",
|
|
||||||
main_category_en
|
|
||||||
from openfoodfacts_cleaned op
|
|
||||||
left join produit p on p.id = op.id
|
|
||||||
where brands like 'Familia%'
|
|
||||||
and p.id is null
|
|
||||||
and "energy-kcal_100g" is not null
|
|
||||||
and proteins_100g is not null
|
|
||||||
and fat_100g is not null
|
|
||||||
and "saturated-fat_100g" is not null
|
|
||||||
and carbohydrates_100g is not null
|
|
||||||
and sugars_100g is not null
|
|
||||||
and salt_100g is not null
|
|
||||||
and nutriscore_score is not null
|
|
||||||
)
|
|
||||||
insert into produit (id, nom, marque,
|
|
||||||
energie, graisse, graisse_saturee,
|
|
||||||
glucide, sucre, proteines, sel,
|
|
||||||
fibres, nutriscore, additifs, additifs_list,
|
|
||||||
potassium, calcium, vitamin_a, vitamin_c, categorie)
|
|
||||||
select * from selection;
|
|
||||||
Reference in New Issue
Block a user