This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
# Calea către folderul care conține imaginile | |
folder_path = r'e:\Carte\BB\17 - Site Leadership\Principal 2022\ro\images' | |
# Obținem o listă cu numele fișierelor webp din folder, fără extensie | |
webp_files = [os.path.splitext(filename)[0] for filename in os.listdir(folder_path) if filename.endswith('.webp')] | |
# Iterăm prin fișierele din folder și ștergem fișierele jpg care au același nume cu fișierele webp | |
for filename in os.listdir(folder_path): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
# Calea către folderul care conține imaginile | |
folder_path = r'e:\Carte\BB\17 - Site Leadership\Principal 2022\ro\images' | |
# Set pentru a stoca numele fișierelor .webp fără extensie | |
webp_files = set() | |
# Colectăm numele fișierelor .webp | |
for filename in os.listdir(folder_path): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import PyPDF2 | |
from pdfminer.high_level import extract_text | |
import fitz # PyMuPDF | |
from PIL import Image | |
def print_step(step): | |
print(f"\n{'='*20} {step} {'='*20}") | |
def repair_with_pypdf2(input_path, output_path): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` | |
https://archive.org Internet Archive Upload PDF ERROR MESSAGE | |
There is a network problem | |
Resume Uploading | |
400 Bad Data (details) | |
<Error><Code>BadContent</Code><Message>Uploaded content is unacceptable.</Message><Resource>Syntax error detected in pdf data. You may be able to repair the pdf file with a repair tool, pdftk is one such tool.</Resource><RequestId>440a5d56-535c-4a53-9453-a6713c02b289</RequestId></Error> | |
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import PyPDF2 | |
import requests | |
import json | |
# Funcție pentru a extrage textul din PDF | |
def extract_text_from_pdf(pdf_path): | |
try: | |
with open(pdf_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import PyPDF2 | |
import pikepdf | |
import requests | |
import fitz # PyMuPDF | |
def check_and_repair_pdf(file_path): | |
print(f"\nAnalizăm fișierul: {file_path}") | |
# Verifică dimensiunea fișierului |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
root_directory = 'j:\\PDF c\\2\\' | |
for folder_name, subfolders, files in os.walk(root_directory): | |
for file_name in files: | |
if file_name.endswith('.pdf'): | |
old_path = os.path.join(folder_name, file_name) | |
new_name = f"{os.path.basename(folder_name)} - {file_name}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
from webdriver_manager.chrome import ChromeDriverManager | |
from urllib.parse import urljoin, unquote | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
from webdriver_manager.chrome import ChromeDriverManager | |
from urllib.parse import urljoin, unquote | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import TimeoutException | |
from webdriver_manager.chrome import ChromeDriverManager | |
from urllib.parse import urljoin, unquote | |
import os |
NewerOlder