Last active
April 14, 2022 17:54
-
-
Save CharlesNepote/2ced9c2c143b3e81e3032e64bb449aa7 to your computer and use it in GitHub Desktop.
Open Food Facts import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
code | |
creator | |
created_t | |
last_modified_t | |
product_name | |
abbreviated_product_name | |
generic_name | |
quantity | |
packaging | |
packaging_text | |
brands | |
categories | |
origins | |
manufacturing_places | |
labels | |
emb_codes | |
cities | |
purchase_places | |
stores | |
countries | |
ingredients_text | |
ingredients_tags | |
allergens | |
traces | |
serving_size | |
serving_quantity | |
no_nutriments | |
additives_n | |
additives | |
nutriscore_score | |
nutriscore_grade | |
nova_group | |
pnns_groups_1 | |
pnns_groups_2 | |
food_groups | |
states | |
brand_owner | |
ecoscore_score | |
ecoscore_grade |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download and decompress the dump | |
wget https://static.openfoodfacts.org/data/openfoodfacts-mongodbdump.tar.gz | |
tar -xzf openfoodfacts-mongodbdump.tar.gz | |
# Restore all the database. mongorestore recreates indexes recorded by mongodump. | |
mongorestore --drop ./dump | |
# => 2254885 document(s) restored successfully. 0 document(s) failed to restore. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Display 5 first products in JSON format, using pagination | |
# https://www.codementor.io/@arpitbhayani/fast-and-efficient-pagination-in-mongodb-9095flbqr | |
mongo off --eval 'db.products.find().limit(5).pretty().shellPrint()' --quiet | |
# Combined with JQ (JSON tool) to provide colors | |
# JQ has to installed separatly. See https://stedolan.github.io/jq/ | |
mongo off --eval 'db.products.find().limit(5).pretty().shellPrint()' --quiet | jq . | |
# Combined with JQ (JSON tool) to provide colors and compact output (each JSON object on a single line (aka JSONL format)) | |
mongo off --eval 'db.products.find().limit(5).pretty().shellPrint()' --quiet | jq . -c | |
# Get products from Germany; return fields "code" and "counties_tags"; limit to 2 products | |
mongo off --eval 'db.products.find({countries_tags: "en:germany"}, {code: 1, countries_tags: 1}).limit(2).pretty().shellPrint()' --quiet | |
# get the data from one field without _id | |
mongo off --eval 'db.products.find({countries_tags: "en:germany"}, {_id: 0, countries_tags: 1}).limit(2).pretty().shellPrint()' --quiet | |
# Exports | |
# See: https://www.mongodb.com/docs/database-tools/mongoexport/ | |
# 1. The "aggregate" way | |
mongo off --eval 'db.products.aggregate([{$match: {product_name: "Coke"}},{$out: "result"}])' | |
mongoexport --db off --collection result --fields code,product_name --type=csv --out result.csv | |
# 2. the -q,--query option way | |
# Export 5 first german products | |
mongoexport -d off -c products --type=csv --fields code,countries_tags -q '{"countries_tags": "en:germany"}}' --out report.csv --limit 5 | |
# Export to STDIN in CSV format; notice option --quiet | |
mongoexport -d off -c products --type=csv --fields code,countries_tags -q '{"countries_tags": "en:germany"}' --limit 5 --quiet | |
# How long to export all German products? | |
time mongoexport -d off -c products --type=csv --fields code,countries_tags -q '{"countries_tags": "en:germany"}' --out report.csv | |
# real 0m10.135s | |
# Specify the fields in a file containing the line-separated list of fields to export (--fieldFile option) | |
# Official csv export fields are coming from @export_fields variable in /lib/ProductOpener/Config_off.pm | |
mongoexport -d off -c products --type=csv --fieldFile official_csv_export_fields.txt -q '{"countries_tags": "en:germany"}' --limit 5 --quiet | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mongo # launch mongo shell | |
# Some basic commands | |
# show databases # show all databases | |
# use off # to work on a particular database | |
# db.products.find() | |
# db.products.find().pretty() | |
# db.products.find({product_name:"Coke"}).pretty() | |
# db.coll.aggregate([ | |
# {$match: {product_name: 'Coke'}}, | |
# {$out: 'result'} | |
#]) | |
# Then from bash shell | |
mongoexport --db off --collection result --fields code,product_name --type=csv --out result.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
code | |
url | |
creator | |
created_t | |
created_datetime | |
last_modified_t | |
last_modified_datetime | |
product_name | |
abbreviated_product_name | |
generic_name | |
quantity | |
packaging | |
packaging_tags | |
packaging_text | |
brands | |
brands_tags | |
categories | |
categories_tags | |
categories_en | |
origins | |
origins_tags | |
origins_en | |
manufacturing_places | |
manufacturing_places_tags | |
labels | |
labels_tags | |
labels_en | |
emb_codes | |
emb_codes_tags | |
first_packaging_code_geo | |
cities | |
cities_tags | |
purchase_places | |
stores | |
countries | |
countries_tags | |
countries_en | |
ingredients_text | |
allergens | |
allergens_en | |
traces | |
traces_tags | |
traces_en | |
serving_size | |
serving_quantity | |
no_nutriments | |
additives_n | |
additives | |
additives_tags | |
additives_en | |
ingredients_from_palm_oil_n | |
ingredients_from_palm_oil | |
ingredients_from_palm_oil_tags | |
ingredients_that_may_be_from_palm_oil_n | |
ingredients_that_may_be_from_palm_oil | |
ingredients_that_may_be_from_palm_oil_tags | |
nutriscore_score | |
nutriscore_grade | |
nova_group | |
pnns_groups_1 | |
pnns_groups_2food_groups | |
food_groups_tags | |
food_groups_en | |
states | |
states_tags | |
states_en | |
brand_owner | |
ecoscore_score_fr | |
ecoscore_grade_fr | |
main_category | |
main_category_en | |
image_url | |
image_small_url | |
image_ingredients_url | |
image_ingredients_small_url | |
image_nutrition_url | |
image_nutrition_small_url | |
energy-kj_100g | |
energy-kcal_100g | |
energy_100g | |
energy-from-fat_100g | |
fat_100g | |
saturated-fat_100g | |
-butyric-acid_100g | |
-caproic-acid_100g | |
-caprylic-acid_100g | |
-capric-acid_100g | |
-lauric-acid_100g | |
-myristic-acid_100g | |
-palmitic-acid_100g | |
-stearic-acid_100g | |
-arachidic-acid_100g | |
-behenic-acid_100g | |
-lignoceric-acid_100g | |
-cerotic-acid_100g | |
-montanic-acid_100g | |
-melissic-acid_100g | |
monounsaturated-fat_100g | |
polyunsaturated-fat_100g | |
omega-3-fat_100g | |
-alpha-linolenic-acid_100g | |
-eicosapentaenoic-acid_100g | |
-docosahexaenoic-acid_100g | |
omega-6-fat_100g | |
-linoleic-acid_100g | |
-arachidonic-acid_100g | |
-gamma-linolenic-acid_100g | |
-dihomo-gamma-linolenic-acid_100g | |
omega-9-fat_100g | |
-oleic-acid_100g | |
-elaidic-acid_100g | |
-gondoic-acid_100g | |
-mead-acid_100g | |
-erucic-acid_100g | |
-nervonic-acid_100g | |
trans-fat_100g | |
cholesterol_100g | |
carbohydrates_100g | |
sugars_100g | |
-sucrose_100g | |
-glucose_100g | |
-fructose_100g | |
-lactose_100g | |
-maltose_100g | |
-maltodextrins_100g | |
starch_100g | |
polyols_100g | |
fiber_100g | |
soluble-fiber_100g | |
insoluble-fiber_100g | |
proteins_100g | |
casein_100g | |
serum-proteins_100g | |
nucleotides_100g | |
salt_100g | |
sodium_100g | |
alcohol_100g | |
vitamin-a_100g | |
beta-carotene_100g | |
vitamin-d_100g | |
vitamin-e_100g | |
vitamin-k_100g | |
vitamin-c_100g | |
vitamin-b1_100g | |
vitamin-b2_100g | |
vitamin-pp_100g | |
vitamin-b6_100g | |
vitamin-b9_100g | |
folates_100g | |
vitamin-b12_100g | |
biotin_100g | |
pantothenic-acid_100g | |
silica_100g | |
bicarbonate_100g | |
potassium_100g | |
chloride_100g | |
calcium_100g | |
phosphorus_100g | |
iron_100g | |
magnesium_100g | |
zinc_100g | |
copper_100g | |
manganese_100g | |
fluoride_100g | |
selenium_100g | |
chromium_100g | |
molybdenum_100g | |
iodine_100g | |
caffeine_100g | |
taurine_100g | |
ph_100g | |
fruits-vegetables-nuts_100g | |
fruits-vegetables-nuts-dried_100g | |
fruits-vegetables-nuts-estimate_100g | |
fruits-vegetables-nuts-estimate-from-ingredients_100g | |
collagen-meat-protein-ratio_100g | |
cocoa_100g | |
chlorophyl_100g | |
carbon-footprint_100g | |
carbon-footprint-from-meat-or-fish_100g | |
nutrition-score-fr_100g | |
nutrition-score-uk_100g | |
glycemic-index_100g | |
water-hardness_100g | |
choline_100g | |
phylloquinone_100g | |
beta-glucan_100g | |
inositol_100g | |
carnitine_100g |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment