123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- """
- https://www.cyberforum.ru/python-graphics/thread2712141.html
- """
- import os
- from difflib import SequenceMatcher
- from tqdm import tqdm
- import datetime
- # директория файла
- BASE_DIR = os.path.abspath(os.path.dirname(__file__))
- # кого проверяем
- who = "Савкин"
- print()
- files_paths = []
- exec_files_paths = [] # уже проверенные
- remove_files_paths = [] # файлы на удаление
- dirs = os.listdir(BASE_DIR)
- for dir in dirs:
- dir_path = os.path.join(BASE_DIR, dir)
- if os.path.isdir(dir_path) and (dir != "__pycache__"):
- files = os.listdir(dir_path)
- for file in files:
- file_path = os.path.join(BASE_DIR, dir, file)
- filename, fileext = os.path.splitext(file)
- if os.path.isfile(file_path) and (fileext=='.md'):
- files_paths.append(file_path)
- now = datetime.datetime.now().strftime('%d-%m-%Y %H:%M')
- out_str = f"Проверка: {who}, время проверки: {now} \n"
- print(out_str)
- for file_1 in tqdm(files_paths):
- for file_2 in files_paths:
- if (file_1 != file_2):
- small_filename_1 = str(file_1).replace(BASE_DIR, "").strip("\\")
- small_filename_2 = str(file_2).replace(BASE_DIR, "").strip("\\")
- if not (f"{small_filename_2}|{small_filename_1}") in exec_files_paths: # проверка на уже пройденное сравнение
- try:
- with open(file_1, encoding="utf-8") as f_1:
- str1 = f_1.read()
- except:
- with open(file_1, encoding="cp1251") as f_1:
- str1 = f_1.read()
- f_1.close()
- with open(file_1, 'w', encoding="utf-8") as f_1:
- f_1.write(str1)
- f_1.close()
-
- try:
- with open(file_2, encoding="utf-8") as f_2:
- str2 = f_2.read()
- except:
- with open(file_2, encoding="cp1251") as f_2:
- str2 = f_2.read()
- f_2.close()
- with open(file_2, 'w', encoding="utf-8") as f_2:
- f_2.write(str2)
- f_2.close()
- ratio = int(SequenceMatcher(None, str1.lower(), str2.lower()).ratio() * 100)
- if (ratio > 70):
- # время создания файла
- later_file = small_filename_1
- early_file = small_filename_2
- if (os.path.getctime(file_1) < os.path.getctime(file_2)):
- early_file = small_filename_1
- later_file = small_filename_2
- out_str += f"{later_file} позже {early_file}\n"
- out_str += f"ratio = {ratio}, удален\n"
- remove_files_paths.append(os.path.join(BASE_DIR, later_file))
- exec_files_paths.append(f"{small_filename_1}|{small_filename_2}")
- out_str +="\n\n"
- print(out_str)
- # запись лога
- log_path = os.path.join(BASE_DIR, "log.md")
- with open(log_path, "r", encoding="utf-8") as f_log:
- prev_str = f_log.read()
- prev_str = out_str + prev_str
- with open(log_path, "w", encoding="utf-8") as f_log:
- f_log.write(prev_str)
- f_log.close()
- # удаление файлов
- for remove_path in remove_files_paths:
- os.remove(remove_path)
- print("success")
|