Skip to content

Instantly share code, notes, and snippets.

@DxDiagDx
Created November 7, 2022 07:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DxDiagDx/9b0841362670a6a7c7d7423600720a75 to your computer and use it in GitHub Desktop.
Save DxDiagDx/9b0841362670a6a7c7d7423600720a75 to your computer and use it in GitHub Desktop.
Python - services.py - Вспомогательные функции для парсинга сайтов
import os
import csv
import time
import json
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
}
def get_response(metod='get', attempts=3, pause=2, **kwargs):
response = None
for attempt in range(1, attempts + 1):
try:
if 'get' == metod.lower():
response = requests.get(headers=headers, **kwargs)
if 'post' == metod.lower():
response = requests.post(headers=headers, **kwargs)
except Exception as err:
print(err)
time.sleep(pause)
continue
if response.status_code == 404:
print('Ошибка 404')
break
if response.status_code == 200:
break
return response
def get_extention(url):
extention = url.split('.')[-1]
return extention
def download_file(url, folder, filename):
if not os.path.exists(folder):
os.makedirs(folder)
path_image = folder + '/' + filename
if not os.path.exists(path_image):
response = get_response(metod='get', url=url, stream=True)
if response:
with open(path_image, 'bw') as file:
for chunk in response.iter_content(1024):
file.write(chunk)
print(f"Изображние {filename} успешно скачано.")
else:
print(f"Изображние {filename} скачано ранее.")
def clear_text(text):
if text:
text_split = []
for i in text.split():
if i:
text_split.append(i)
text = ' '.join(text_split)
return text
def create_csv(filename, fieldnames):
with open(filename, 'w', encoding='utf-8', newline='') as file:
csv.DictWriter(file, fieldnames=fieldnames).writeheader()
def write_csv(filename, fieldnames, data):
with open(filename, 'a', encoding='utf-8', newline='') as file:
csv.DictWriter(file, fieldnames=fieldnames).writerow(data)
def open_csv(filename):
with open(filename, 'r', encoding='utf-8') as file:
lines = [line for line in csv.DictReader(file)]
return lines
def open_html(filename):
with open(filename, 'r', encoding='utf-8') as file:
return file.read()
def save_html(filename, html):
with open(filename, 'w', encoding='utf-8') as file:
file.write(html)
# print('html сохранён')
def open_json(filename):
with open(filename, 'r', encoding='utf-8') as file:
return json.load(file)
def save_json(filename, data):
with open(filename, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False)
print('json сохранён')
def main():
pass
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment