Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import urllib2
from bs4 import BeautifulSoup
import json
import re
#fp = open('soup.txt')
#doc = fp.read()
response = urllib2.urlopen('https://www.bnpparibas.dz/trouver-une-agence/')
doc = response.read()
soup = BeautifulSoup(doc, 'html.parser')
branches = []
for elm in soup.find_all("li", class_="agency-orange"):
#import pdb;pdb.set_trace()
cityAndPostcode = elm.find_all('a', class_='button3')[0].parent.next_sibling.next_sibling.next_element.next_element.next_element.replace('\n\t\t\t\t\t\t', '')
cityAndPostcode = re.split("([0-9]+)", cityAndPostcode)
city = cityAndPostcode[2].strip()
postcode = cityAndPostcode[1].strip()
branch = {
'name': elm.find_all('a', class_='button3')[0].text,
'address': {
'line_1': elm.find_all('a', class_='button3')[0].parent.next_sibling.next_sibling.next_element.strip(),
'city': city,
'postcode': postcode
}
}
branches.append(branch)
print json.dumps(branches)
@chrisjsimpson

This comment has been minimized.

Copy link
Owner Author

@chrisjsimpson chrisjsimpson commented Feb 26, 2019

Outputs:

[{"name": "1er mai", "address": "90 Rue Hassiba Ben Bouali - 1er Mai  16000 Alger "}, {"name": "Agence DIGITALE Oran", "address": "7, Boulevard Larbi Tebessi 31200 Oran "}, {"name": "Agence DIGITALE Sidi Yahia", "address": "Boulevard Sidi Yahia,  16000 Alger "}, {"name": "A\u00efn Benian", "address": "103 rue Omar Idriss Fay\u00e7al 16260 Alger "}, {"name": "Ain Romana", "address": "41 Zone Industrielle Ben Boulaid Ain Romana 9000 Blida "}, {"name": "Ain Timouchent", "address": "13, cit\u00e9 57 logt Abou Bakr Sedik (ex Les Castors) 46000 Ain Timouchent "}, {"name": "A\u00efn Turk", "address": "2 Rue Mohamed Boudiaf 31000 Oran "}, {"name": "Akbou", "address": "RN 26 Faubourg de la gare 6001 Beja\u00efa "}, {"name": "Alger", "address": "Ilot 1, Quartier d'affaires d'Alger 16000 Alger "}, {"name": "Ali Khoudja", "address": "135 avenue Ali Khoudja 16606 Alger "}, {"name": "Annaba", "address": "57 boulevard Colonel Amirouche 23001 Annaba "}, {"name": "Annaba ALN", "address": "40 avenue de l'ALN - Champs de Mars 23001 Annaba "}, {"name": "Arzew", "address": "3 avenue des jardins 31200 Oran "}, {"name": "Bab Essebt", "address": "2 Boulevard Larbi Tebessi 9000 Blida "}, {"name": "Bab Ezzouar", "address": "Centre Commercial et des Loisirs 16000 Alger "}, {"name": "Baba Hassen", "address": "Cit\u00e9 Al Oumrane n\u00b03 16303 Alger "}, {"name": "Beaulieu", "address": "99 route de Meftah 16310 Alger "}, {"name": "B\u00e9jaia", "address": "Cit\u00e9 Somacob Immeuble G - 18 rue Boumedaoui Nacer 6000 Beja\u00efa "}, {"name": "B\u00e9ja\u00efa ALN", "address": "5 boulevard de l'ALN - Daouadji 6000 Beja\u00efa "}, {"name": "Ben Aknoun", "address": "Z\u00e9phir Center - 11 rue Doudou Mokhtar 16306 Alger "}, {"name": "biskra", "address": "24, boulevard El Amir Abdelkader  07000 Biskra "}, {"name": "Blida", "address": "Complexe multifonctionnel Sim boulevard Mohamed Boudiaf 9000 Blida "}, {"name": "Bordj Bou Arreridj", "address": "31 boulevard Houari Boumediene, 34000 Bordj Bou Arreridj "}, {"name": "Bordj El Kiffan", "address": "3 rue Saidi Ahmed 16120 Alger "}, {"name": "Boufarik", "address": "54 boulevard Mokhtari 9400 Blida "}, {"name": "Bouira", "address": "72 Boulevard Zighout Youcef,  10000 Bouira "}, {"name": "Boumerdes", "address": "R\u00e9sidence Badi RN 24 35000 Boumerdes "}, {"name": "Boumerdes R\u00e9sidence Zidane", "address": "Cit\u00e9 Ain Abdellah 35000 Boumerdes "}, {"name": "Ch\u00e9raga", "address": "Lot N\u00b01 Amara 2 route de Ouled Fayet 16086 Alger "}, {"name": "Chlef", "address": "26 rue de la R\u00e9sistance 2000 Chlef "}, {"name": "Cirta", "address": "rue de Cirta 16035 Hydra "}, {"name": "Constantine AADL", "address": "Bd de l'ALN B\u00e2t TH03 local C012 nouvelle ville Ali Mendjeli-El Kheroub 25000 Constantine "}, {"name": "Constantine Belle Vue", "address": "28 rue Zadi Abdelhamid - Belle Vue 25000 Constantine "}, {"name": "Constantine Cirta", "address": "10 rue Aouati Mustapha 25000 Constantine "}, {"name": "Dar El Beida", "address": "Cit\u00e9 Aissat Idir 16033 Alger "}, {"name": "Dely Ibrahim Bois des Cars", "address": "10 Lots Bois des Cars II 16320 Alger "}, {"name": "Dely Ibrahim Cit\u00e9 Olympique", "address": "Cit\u00e9 Olympique 16320 Alger "}, {"name": "Didouche Mourad", "address": "9A rue Didouche Mourad 16000 Alger "}, {"name": "Draria", "address": "1 chemin des Cr\u00eates 16003 Alger "}, {"name": "El Achour", "address": "53 lot d'El Achour 16403 Alger "}, {"name": "El Mouradia", "address": "10 rue des Fr\u00e8res Oughilis 16070 Alger "}, {"name": "Hassi Messaoud", "address": "Cit\u00e9 1850 logements,  30500 Hassi Messaoud "}, {"name": "Hydra Djamila", "address": "8 Rue de Cirta 16035 Hydra "}, {"name": "Jijel", "address": "26, Rue du 20 Ao\u00fbt 1955,  18000 Jijel "}, {"name": "Kol\u00e9a", "address": "Route de Fouka - B\u00e2t A9 bis 42400 Tipaza "}, {"name": "Kouba", "address": "Local Nouvion La Croix 16050 Alger "}, {"name": "La Vigerie", "address": "48 boulevard Colonel Bougara - Belle Vue 16200 Alger "}, {"name": "Les Sources", "address": "Lotissement n\u00b03 - Villa n\u00b03 16000 Alger "}, {"name": "Mascara", "address": "Rue Bougara Tahar Bel-Air 29000 Mascara "}, {"name": "M\u00e9d\u00e9a", "address": "12 boulevard de l'ALN 26000 M\u00e9d\u00e9a "}, {"name": "Mostaganem", "address": "Cour de la Culture - Route d'Oran 27000 Mostaganem "}, {"name": "MSILA", "address": "Cit\u00e9 Administrative M'Sila 28000 MSILA "}, {"name": "Oran Bir El Djir", "address": "84 Lotissement, 110 Hai Emir  31001 Oran "}, {"name": "Oran Chakib Arslan", "address": "34, avenue Chakib Arslane 31001 Oran "}, {"name": "Oran Millenium", "address": "A1 Tranche 01 Boulevard du Millenium 31000 Oran "}, {"name": "Oran Route de Canastel", "address": "Route Canastel ILOT 13  31001 Oran "}, {"name": "Oran St Hubert", "address": "4 boulevard de l'ANP,  31001 Oran "}, {"name": "Oran Usto", "address": "11 Coop\u00e9rative Zighout Youcef, 31001 Oran "}, {"name": "Rostomia (Ex Bouzareah)", "address": "N\u00b011 rue 1er Novembre,  16340 Alger "}, {"name": "Rouiba", "address": "Cit\u00e9 Cadat N\u00b0 90 16012 Alger "}, {"name": "S\u00e9tif", "address": "Coop\u00e9rative 1er novembre 1954 n\u00b08 19000 S\u00e9tif "}, {"name": "S\u00e9tif \u2013 1er Novembre 1954", "address": "3 avenue du 1er Novembre 1954 19000 S\u00e9tif "}, {"name": "S\u00e9tif Park Mall", "address": "Rue du 08 novembre,  19000 S\u00e9tif "}, {"name": "Sidi Bel Abbes", "address": "2 boulevard Larbi Tbessi 22005 Sidi Bel Abbes "}, {"name": "Sidi Yahia", "address": "4 Chemin Sidi Yahia 16300 Alger "}, {"name": "Si\u00e8ge Social", "address": "Ilot 1, Quartier d'affaires d'Alger 16000 Alger "}, {"name": "Skikda", "address": "10 Cit\u00e9 Amar Chetaibi 21000 Skikda "}, {"name": "Staoueli", "address": "64 rue Kaiti Ahmed 16062 Alger "}, {"name": "Telemly", "address": "82 boulevard Krim Belkacem 16000 Alger "}, {"name": "Tiaret", "address": "Cit\u00e9 du 1er Novembre 14000 Tiaret "}, {"name": "Tipaza", "address": "Route Nationale - Art\u00e8re principale 42000 Tipaza "}, {"name": "Tizi Ouzou", "address": "Rue Stiti Ali 15000 Tizi Ouzou "}, {"name": "Tlemcen", "address": "3 boulevard Abderrahmane Derrar 13000 Tlemcen "}]
@chrisjsimpson

This comment has been minimized.

Copy link
Owner Author

@chrisjsimpson chrisjsimpson commented Feb 27, 2019

Outputs;

https://jsoneditoronline.org/?id=43492f1f30a34788a347bcedcd934275

[{"name": "1er mai", "address": {"line_1": "90 Rue Hassiba Ben Bouali - 1er Mai", "city": "Alger", "postcode": "16000"}}, {"name": "Agence DIGITALE Oran", "address": {"line_1": "7, Boulevard Larbi Tebessi", "city": "Oran", "postcode": "31200"}}, {"name": "Agence DIGITALE Sidi Yahia", "address": {"line_1": "Boulevard Sidi Yahia,", "city": "Alger", "postcode": "16000"}}, {"name": "A\u00efn Benian", "address": {"line_1": "103 rue Omar Idriss Fay\u00e7al", "city": "Alger", "postcode": "16260"}}, {"name": "Ain Romana", "address": {"line_1": "41 Zone Industrielle Ben Boulaid Ain Romana", "city": "Blida", "postcode": "9000"}}, {"name": "Ain Timouchent", "address": {"line_1": "13, cit\u00e9 57 logt Abou Bakr Sedik (ex Les Castors)", "city": "Ain Timouchent", "postcode": "46000"}}, {"name": "A\u00efn Turk", "address": {"line_1": "2 Rue Mohamed Boudiaf", "city": "Oran", "postcode": "31000"}}, {"name": "Akbou", "address": {"line_1": "RN 26 Faubourg de la gare", "city": "Beja\u00efa", "postcode": "6001"}}, {"name": "Alger", "address": {"line_1": "Ilot 1, Quartier d'affaires d'Alger", "city": "Alger", "postcode": "16000"}}, {"name": "Ali Khoudja", "address": {"line_1": "135 avenue Ali Khoudja", "city": "Alger", "postcode": "16606"}}, {"name": "Annaba", "address": {"line_1": "57 boulevard Colonel Amirouche", "city": "Annaba", "postcode": "23001"}}, {"name": "Annaba ALN", "address": {"line_1": "40 avenue de l'ALN - Champs de Mars", "city": "Annaba", "postcode": "23001"}}, {"name": "Arzew", "address": {"line_1": "3 avenue des jardins", "city": "Oran", "postcode": "31200"}}, {"name": "Bab Essebt", "address": {"line_1": "2 Boulevard Larbi Tebessi", "city": "Blida", "postcode": "9000"}}, {"name": "Bab Ezzouar", "address": {"line_1": "Centre Commercial et des Loisirs", "city": "Alger", "postcode": "16000"}}, {"name": "Baba Hassen", "address": {"line_1": "Cit\u00e9 Al Oumrane n\u00b03", "city": "Alger", "postcode": "16303"}}, {"name": "Beaulieu", "address": {"line_1": "99 route de Meftah", "city": "Alger", "postcode": "16310"}}, {"name": "B\u00e9jaia", "address": {"line_1": "Cit\u00e9 Somacob Immeuble G - 18 rue Boumedaoui Nacer", "city": "Beja\u00efa", "postcode": "6000"}}, {"name": "B\u00e9ja\u00efa ALN", "address": {"line_1": "5 boulevard de l'ALN - Daouadji", "city": "Beja\u00efa", "postcode": "6000"}}, {"name": "Ben Aknoun", "address": {"line_1": "Z\u00e9phir Center - 11 rue Doudou Mokhtar", "city": "Alger", "postcode": "16306"}}, {"name": "biskra", "address": {"line_1": "24, boulevard El Amir Abdelkader", "city": "Biskra", "postcode": "07000"}}, {"name": "Blida", "address": {"line_1": "Complexe multifonctionnel Sim boulevard Mohamed Boudiaf", "city": "Blida", "postcode": "9000"}}, {"name": "Bordj Bou Arreridj", "address": {"line_1": "31 boulevard Houari Boumediene,", "city": "Bordj Bou Arreridj", "postcode": "34000"}}, {"name": "Bordj El Kiffan", "address": {"line_1": "3 rue Saidi Ahmed", "city": "Alger", "postcode": "16120"}}, {"name": "Boufarik", "address": {"line_1": "54 boulevard Mokhtari", "city": "Blida", "postcode": "9400"}}, {"name": "Bouira", "address": {"line_1": "72 Boulevard Zighout Youcef,", "city": "Bouira", "postcode": "10000"}}, {"name": "Boumerdes", "address": {"line_1": "R\u00e9sidence Badi RN 24", "city": "Boumerdes", "postcode": "35000"}}, {"name": "Boumerdes R\u00e9sidence Zidane", "address": {"line_1": "Cit\u00e9 Ain Abdellah", "city": "Boumerdes", "postcode": "35000"}}, {"name": "Ch\u00e9raga", "address": {"line_1": "Lot N\u00b01 Amara 2 route de Ouled Fayet", "city": "Alger", "postcode": "16086"}}, {"name": "Chlef", "address": {"line_1": "26 rue de la R\u00e9sistance", "city": "Chlef", "postcode": "2000"}}, {"name": "Cirta", "address": {"line_1": "rue de Cirta", "city": "Hydra", "postcode": "16035"}}, {"name": "Constantine AADL", "address": {"line_1": "Bd de l'ALN B\u00e2t TH03 local C012 nouvelle ville Ali Mendjeli-El Kheroub", "city": "Constantine", "postcode": "25000"}}, {"name": "Constantine Belle Vue", "address": {"line_1": "28 rue Zadi Abdelhamid - Belle Vue", "city": "Constantine", "postcode": "25000"}}, {"name": "Constantine Cirta", "address": {"line_1": "10 rue Aouati Mustapha", "city": "Constantine", "postcode": "25000"}}, {"name": "Dar El Beida", "address": {"line_1": "Cit\u00e9 Aissat Idir", "city": "Alger", "postcode": "16033"}}, {"name": "Dely Ibrahim Bois des Cars", "address": {"line_1": "10 Lots Bois des Cars II", "city": "Alger", "postcode": "16320"}}, {"name": "Dely Ibrahim Cit\u00e9 Olympique", "address": {"line_1": "Cit\u00e9 Olympique", "city": "Alger", "postcode": "16320"}}, {"name": "Didouche Mourad", "address": {"line_1": "9A rue Didouche Mourad", "city": "Alger", "postcode": "16000"}}, {"name": "Draria", "address": {"line_1": "1 chemin des Cr\u00eates", "city": "Alger", "postcode": "16003"}}, {"name": "El Achour", "address": {"line_1": "53 lot d'El Achour", "city": "Alger", "postcode": "16403"}}, {"name": "El Mouradia", "address": {"line_1": "10 rue des Fr\u00e8res Oughilis", "city": "Alger", "postcode": "16070"}}, {"name": "Hassi Messaoud", "address": {"line_1": "Cit\u00e9 1850 logements,", "city": "Hassi Messaoud", "postcode": "30500"}}, {"name": "Hydra Djamila", "address": {"line_1": "8 Rue de Cirta", "city": "Hydra", "postcode": "16035"}}, {"name": "Jijel", "address": {"line_1": "26, Rue du 20 Ao\u00fbt 1955,", "city": "Jijel", "postcode": "18000"}}, {"name": "Kol\u00e9a", "address": {"line_1": "Route de Fouka - B\u00e2t A9 bis", "city": "Tipaza", "postcode": "42400"}}, {"name": "Kouba", "address": {"line_1": "Local Nouvion La Croix", "city": "Alger", "postcode": "16050"}}, {"name": "La Vigerie", "address": {"line_1": "48 boulevard Colonel Bougara - Belle Vue", "city": "Alger", "postcode": "16200"}}, {"name": "Les Sources", "address": {"line_1": "Lotissement n\u00b03 - Villa n\u00b03", "city": "Alger", "postcode": "16000"}}, {"name": "Mascara", "address": {"line_1": "Rue Bougara Tahar Bel-Air", "city": "Mascara", "postcode": "29000"}}, {"name": "M\u00e9d\u00e9a", "address": {"line_1": "12 boulevard de l'ALN", "city": "M\u00e9d\u00e9a", "postcode": "26000"}}, {"name": "Mostaganem", "address": {"line_1": "Cour de la Culture - Route d'Oran", "city": "Mostaganem", "postcode": "27000"}}, {"name": "MSILA", "address": {"line_1": "Cit\u00e9 Administrative M'Sila", "city": "MSILA", "postcode": "28000"}}, {"name": "Oran Bir El Djir", "address": {"line_1": "84 Lotissement, 110 Hai Emir", "city": "Oran", "postcode": "31001"}}, {"name": "Oran Chakib Arslan", "address": {"line_1": "34, avenue Chakib Arslane", "city": "Oran", "postcode": "31001"}}, {"name": "Oran Millenium", "address": {"line_1": "A1 Tranche 01 Boulevard du Millenium", "city": "Oran", "postcode": "31000"}}, {"name": "Oran Route de Canastel", "address": {"line_1": "Route Canastel ILOT 13", "city": "Oran", "postcode": "31001"}}, {"name": "Oran St Hubert", "address": {"line_1": "4 boulevard de l'ANP,", "city": "Oran", "postcode": "31001"}}, {"name": "Oran Usto", "address": {"line_1": "11 Coop\u00e9rative Zighout Youcef,", "city": "Oran", "postcode": "31001"}}, {"name": "Rostomia (Ex Bouzareah)", "address": {"line_1": "N\u00b011 rue 1er Novembre,", "city": "Alger", "postcode": "16340"}}, {"name": "Rouiba", "address": {"line_1": "Cit\u00e9 Cadat N\u00b0 90", "city": "Alger", "postcode": "16012"}}, {"name": "S\u00e9tif", "address": {"line_1": "Coop\u00e9rative 1er novembre 1954 n\u00b08", "city": "S\u00e9tif", "postcode": "19000"}}, {"name": "S\u00e9tif \u2013 1er Novembre 1954", "address": {"line_1": "3 avenue du 1er Novembre 1954", "city": "S\u00e9tif", "postcode": "19000"}}, {"name": "S\u00e9tif Park Mall", "address": {"line_1": "Rue du 08 novembre,", "city": "S\u00e9tif", "postcode": "19000"}}, {"name": "Sidi Bel Abbes", "address": {"line_1": "2 boulevard Larbi Tbessi", "city": "Sidi Bel Abbes", "postcode": "22005"}}, {"name": "Sidi Yahia", "address": {"line_1": "4 Chemin Sidi Yahia", "city": "Alger", "postcode": "16300"}}, {"name": "Si\u00e8ge Social", "address": {"line_1": "Ilot 1, Quartier d'affaires d'Alger", "city": "Alger", "postcode": "16000"}}, {"name": "Skikda", "address": {"line_1": "10 Cit\u00e9 Amar Chetaibi", "city": "Skikda", "postcode": "21000"}}, {"name": "Staoueli", "address": {"line_1": "64 rue Kaiti Ahmed", "city": "Alger", "postcode": "16062"}}, {"name": "Telemly", "address": {"line_1": "82 boulevard Krim Belkacem", "city": "Alger", "postcode": "16000"}}, {"name": "Tiaret", "address": {"line_1": "Cit\u00e9 du 1er Novembre", "city": "Tiaret", "postcode": "14000"}}, {"name": "Tipaza", "address": {"line_1": "Route Nationale - Art\u00e8re principale", "city": "Tipaza", "postcode": "42000"}}, {"name": "Tizi Ouzou", "address": {"line_1": "Rue Stiti Ali", "city": "Tizi Ouzou", "postcode": "15000"}}, {"name": "Tlemcen", "address": {"line_1": "3 boulevard Abderrahmane Derrar", "city": "Tlemcen", "postcode": "13000"}}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment