kaggle
This commit is contained in:
@@ -27,3 +27,7 @@ psql -h 127.0.0.1 -d sql -U sql -c "\copy (select * from ligne order by id) to '
|
||||
psql -h 127.0.0.1 -d sql -U sql -c "\copy (select id, source, target, cost, reverse_cost, ST_AsText(geom) from route order by id) to './route.csv' with csv header"
|
||||
|
||||
```
|
||||
|
||||
### Jeux de données
|
||||
|
||||
- https://www.kaggle.com/datasets/alexandrelemercier/food-detailed-nutritional-content
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
create table produit (
|
||||
id bigint primary key,
|
||||
nom text not null,
|
||||
marque text null,
|
||||
categorie text null,
|
||||
energie int not null,
|
||||
proteines float4 null,
|
||||
glucides float4 null,
|
||||
sucres float4 null,
|
||||
graisses float4 null,
|
||||
graisses_saturees float4 null,
|
||||
sel float4 null,
|
||||
fibres float4 null,
|
||||
nutriscore int null,
|
||||
additifs int null,
|
||||
additifs_list text[] null,
|
||||
potassium float null,
|
||||
calcium float null,
|
||||
magnesium float null,
|
||||
sodium float null,
|
||||
chlorure float null,
|
||||
sulfate float null,
|
||||
nitrate float null,
|
||||
hydrogenocarbonate float null,
|
||||
silice float null,
|
||||
fluor float null,
|
||||
residu float null,
|
||||
ph float null,
|
||||
vitamin_a float null,
|
||||
vitamin_c float null
|
||||
);
|
||||
|
||||
comment on column produit.potassium IS 'K⁺ en mg/L';
|
||||
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
||||
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
||||
comment on column produit.sodium IS 'Na⁺ en mg/L';
|
||||
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
||||
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
||||
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
||||
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
||||
comment on column produit.silice IS s'SiO₂ en mg/L';
|
||||
comment on column produit.fluor IS 'F en mg/L';
|
||||
|
||||
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||
|
||||
53
pgvector/pgvector.sql
Normal file
53
pgvector/pgvector.sql
Normal file
@@ -0,0 +1,53 @@
|
||||
create table produit (
|
||||
id bigint primary key,
|
||||
ean13 EAN13 null
|
||||
nom text not null,
|
||||
marque text null,
|
||||
categorie text null,
|
||||
energie int not null,
|
||||
proteines float4 null,
|
||||
glucides float4 null,
|
||||
sucres float4 null,
|
||||
graisses float4 null,
|
||||
graisses_saturees float4 null,
|
||||
sel float4 null,
|
||||
fibres float4 null,
|
||||
nutriscore int null,
|
||||
additifs int null,
|
||||
additifs_list text[] null,
|
||||
potassium float null,
|
||||
calcium float null,
|
||||
magnesium float null,
|
||||
sodium float null,
|
||||
chlorure float null,
|
||||
sulfate float null,
|
||||
nitrate float null,
|
||||
hydrogenocarbonate float null,
|
||||
silice float null,
|
||||
fluor float null,
|
||||
residu float null,
|
||||
ph float null,
|
||||
vitamin_a float null,
|
||||
vitamin_c float null
|
||||
);
|
||||
|
||||
comment on column produit.potassium IS 'K⁺ en mg/L';
|
||||
comment on column produit.calcium IS 'Ca²⁺ en mg/L';
|
||||
comment on column produit.magnesium IS 'Mg²⁺ en mg/L';
|
||||
comment on column produit.sodium IS 'Na⁺ en mg/L';
|
||||
comment on column produit.chlorure IS 'Cl⁻ en mg/L';
|
||||
comment on column produit.sulfate IS 'SO₄²⁻ en mg/L';
|
||||
comment on column produit.nitrate IS 'NO₃⁻ en mg/L';
|
||||
comment on column produit.hydrogenocarbonate IS 'HCO₃⁻ en mg/L';
|
||||
comment on column produit.silice IS s'SiO₂ en mg/L';
|
||||
comment on column produit.fluor IS 'F en mg/L';
|
||||
|
||||
\COPY produit FROM '/tmp/produits/cereales_petitdejeuner.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||
\COPY produit FROM '/tmp/produits/confiture.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||
\COPY produit FROM '/tmp/produit.csv' (FORMAT CSV, header, ENCODING 'UTF8');
|
||||
|
||||
alter table produits
|
||||
add column nutrition vector(10);
|
||||
|
||||
update produits
|
||||
set nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector;
|
||||
25
pgvector/populate.sql
Normal file
25
pgvector/populate.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
with selection as (
|
||||
select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g",
|
||||
"carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g",
|
||||
"nutriscore_score", "additives_n", string_to_array(additives_en, ','),
|
||||
"potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g",
|
||||
main_category_en
|
||||
from openfoodfacts_cleaned op
|
||||
left join produit p on p.id = op.id
|
||||
where brands like 'Nestlé%'
|
||||
and p.id is null
|
||||
and "energy-kcal_100g" is not null
|
||||
and proteins_100g is not null
|
||||
and fat_100g is not null
|
||||
and "saturated-fat_100g" is not null
|
||||
and carbohydrates_100g is not null
|
||||
and sugars_100g is not null
|
||||
and salt_100g is not null
|
||||
and nutriscore_score is not null
|
||||
)
|
||||
insert into produit (id, nom, marque,
|
||||
energie, graisse, graisse_saturee,
|
||||
glucide, sucre, proteines, sel,
|
||||
fibres, nutriscore, additifs, additifs_list,
|
||||
potassium, calcium, vitamin_a, vitamin_c, categorie)
|
||||
select * from selection;
|
||||
@@ -5,7 +5,8 @@ create table point (
|
||||
geom geometry(point, 4326) not null
|
||||
);
|
||||
|
||||
create index idx_point_geom on point using gist (geom);
|
||||
create index idx_point_geom
|
||||
on point using gist (geom);
|
||||
|
||||
create table route (
|
||||
id serial primary key,
|
||||
@@ -15,7 +16,8 @@ create table route (
|
||||
longueur double precision
|
||||
);
|
||||
|
||||
create index idx_route_geom on route using gist (geom);
|
||||
create index idx_route_geom
|
||||
on route using gist (geom);
|
||||
|
||||
create or replace function maj_longueur()
|
||||
returns trigger as $$
|
||||
|
||||
60
vector.sql
60
vector.sql
@@ -1,60 +0,0 @@
|
||||
CREATE TABLE produits (
|
||||
id SERIAL PRIMARY KEY,
|
||||
nom TEXT,
|
||||
caracteristiques VECTOR(6)
|
||||
);
|
||||
|
||||
INSERT INTO produits (nom, caracteristiques) VALUES
|
||||
('Tomate', '[0.6, 0.3, 0.3, 1.0, 0.5, 1.0]'),
|
||||
('Concombre', '[0.2, 0.1, 0.9, 1.0, 0.0, 0.0]'),
|
||||
('Courgette', '[0.3, 0.1, 0.5, 1.0, 0.0, 0.0]'),
|
||||
('Carotte', '[0.5, 0.1, 0.8, 0.5, 0.0, 1.0]'),
|
||||
('Poivron', '[0.4, 0.2, 0.6, 1.0, 0.0, 1.0]'),
|
||||
('Aubergine', '[0.2, 0.1, 0.4, 1.0, 0.0, 0.0]'),
|
||||
('Pomme', '[0.8, 0.3, 0.9, 1.0, 1.0, 1.0]'),
|
||||
('Poire', '[0.7, 0.2, 0.7, 1.0, 1.0, 0.0]'),
|
||||
('Fraise', '[0.9, 0.4, 0.7, 1.0, 1.0, 1.0]'),
|
||||
('Cerise', '[0.8, 0.3, 0.6, 1.0, 1.0, 1.0]'),
|
||||
('Citron', '[0.2, 1.0, 0.6, 1.0, 1.0, 1.0]'),
|
||||
('Orange', '[0.7, 0.6, 0.7, 1.0, 1.0, 1.0]'),
|
||||
('Banane', '[0.9, 0.1, 0.2, 1.0, 1.0, 0.0]'),
|
||||
('Raisin', '[0.8, 0.3, 0.5, 1.0, 1.0, 1.0]'),
|
||||
('Pastèque', '[0.8, 0.2, 0.6, 1.0, 1.0, 1.0]'),
|
||||
('Melon', '[0.9, 0.2, 0.5, 1.0, 1.0, 1.0]'),
|
||||
('Betterave', '[0.5, 0.1, 0.4, 0.0, 0.0, 1.0]'),
|
||||
('Radis', '[0.2, 0.3, 0.9, 0.5, 0.0, 1.0]'),
|
||||
('Brocoli', '[0.1, 0.1, 0.6, 0.0, 0.0, 0.0]'),
|
||||
('Chou-fleur', '[0.1, 0.1, 0.7, 0.0, 0.0, 0.0]');
|
||||
|
||||
alter table produits
|
||||
add column nutrition vector(10);
|
||||
|
||||
UPDATE produits
|
||||
SET nutrition = ARRAY[energie, proteines, glucides, sucres, graisses, graisses_saturees, sel, fibres, nutriscore, additifs]::vector;
|
||||
|
||||
|
||||
with selection as (
|
||||
select op.id, product_name, brands, "energy-kcal_100g", fat_100g, "saturated-fat_100g",
|
||||
"carbohydrates_100g", "sugars_100g", "proteins_100g", "salt_100g", "fiber_100g",
|
||||
"nutriscore_score", "additives_n", string_to_array(additives_en, ','),
|
||||
"potassium_100g", "calcium_100g", "vitamin-a_100g", "vitamin-c_100g",
|
||||
main_category_en
|
||||
from openfoodfacts_cleaned op
|
||||
left join produit p on p.id = op.id
|
||||
where brands like 'Familia%'
|
||||
and p.id is null
|
||||
and "energy-kcal_100g" is not null
|
||||
and proteins_100g is not null
|
||||
and fat_100g is not null
|
||||
and "saturated-fat_100g" is not null
|
||||
and carbohydrates_100g is not null
|
||||
and sugars_100g is not null
|
||||
and salt_100g is not null
|
||||
and nutriscore_score is not null
|
||||
)
|
||||
insert into produit (id, nom, marque,
|
||||
energie, graisse, graisse_saturee,
|
||||
glucide, sucre, proteines, sel,
|
||||
fibres, nutriscore, additifs, additifs_list,
|
||||
potassium, calcium, vitamin_a, vitamin_c, categorie)
|
||||
select * from selection;
|
||||
Reference in New Issue
Block a user