|
@@ -43,11 +43,14 @@ def preprocess_text(text):
|
|
|
# директория файла
|
|
|
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
|
|
-
|
|
|
# ссылка для проверки
|
|
|
url = "http://213.155.192.79:3001/ypv/up/raw/master/%d0%ad%d0%90%d0%a1%d0%b2%d0%97%d0%98/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/Doc.md"
|
|
|
+who = "Савкин С."
|
|
|
+
|
|
|
|
|
|
-print()
|
|
|
+
|
|
|
+now = datetime.datetime.now().strftime('%d-%m-%Y %H:%M')
|
|
|
+out_str = f"Проверка: {who}, время проверки: {now} \n"
|
|
|
|
|
|
response = requests.get(url)
|
|
|
post_html = response.text
|
|
@@ -57,7 +60,7 @@ post_list = post_html.split("\n")
|
|
|
# проверяем правильность оформления 1й строки
|
|
|
line_1 = post_list[0]
|
|
|
if (line_1[0]) != "#":
|
|
|
- print("Заголовок статьи не найден")
|
|
|
+ out_str += "Заголовок статьи не найден\n"
|
|
|
|
|
|
header_text = line_1.replace("# ", "")
|
|
|
header_text = header_text.replace(".", "")
|
|
@@ -82,12 +85,11 @@ for readme_str in readme_list:
|
|
|
name_str = name_str.replace(".", "")
|
|
|
|
|
|
if (str(name_str) == str(header_text)):
|
|
|
- print("Лекция найдена")
|
|
|
+ out_str += "Лекция найдена\n"
|
|
|
lection_exist = True
|
|
|
|
|
|
post_tokens, post_uniq_text = preprocess_text(post_html)
|
|
|
- print(f"количество уникальных слов: {len(set(post_tokens))}")
|
|
|
- print()
|
|
|
+ out_str += f"количество уникальных слов: {len(set(post_tokens))}\n\n"
|
|
|
|
|
|
|
|
|
|
|
@@ -100,7 +102,7 @@ for readme_str in readme_list:
|
|
|
if lection_exist and (str(name_str) != str(header_text)):
|
|
|
variant_name, t = readme_str.split("]")
|
|
|
variant_name = variant_name.strip("[")
|
|
|
- print(f"проверяю {variant_name}")
|
|
|
+ out_str += f"проверяю {variant_name}\n"
|
|
|
t, variant_uri = readme_str.split("(")
|
|
|
variant_uri = variant_uri.replace("),", "")
|
|
|
variant_uri = variant_uri.strip()
|
|
@@ -114,60 +116,27 @@ for readme_str in readme_list:
|
|
|
variant_html = f.read()
|
|
|
|
|
|
variant_tokens, variant_uniq_text = preprocess_text(variant_html)
|
|
|
- print(f"количество уникальных слов варианта: {len(set(variant_tokens))}")
|
|
|
+ out_str += f"количество уникальных слов варианта: {len(set(variant_tokens))}\n"
|
|
|
|
|
|
# пересечение множеств
|
|
|
c = list(set(post_tokens) & set(variant_tokens))
|
|
|
ratio = 1 - (len(c) / len(set(post_tokens)))
|
|
|
- print(f"количество совпадающих слов: {len(c)} / {ratio}%")
|
|
|
- print()
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-exit()
|
|
|
-files_paths = []
|
|
|
-dirs = os.listdir(BASE_DIR)
|
|
|
-for dir in dirs:
|
|
|
- dir_path = os.path.join(BASE_DIR, dir)
|
|
|
- if os.path.isdir(dir_path) and (dir != "__pycache__"):
|
|
|
- files = os.listdir(dir_path)
|
|
|
- for file in files:
|
|
|
- file_path = os.path.join(BASE_DIR, dir, file)
|
|
|
- filename, fileext = os.path.splitext(file)
|
|
|
-
|
|
|
- if os.path.isfile(file_path) and (fileext=='.md'):
|
|
|
- files_paths.append(file_path)
|
|
|
-
|
|
|
-out_str = ""
|
|
|
-max_ratio = 0
|
|
|
-max_ratio_file = ""
|
|
|
-for file_1 in tqdm(files_paths):
|
|
|
- small_filename_1 = str(file_1).replace(BASE_DIR, "").strip("\\")
|
|
|
- try:
|
|
|
- with open(file_1, encoding="utf-8") as f_1:
|
|
|
- str1 = f_1.read()
|
|
|
- except:
|
|
|
- with open(file_1, encoding="cp1251") as f_1:
|
|
|
- str1 = f_1.read()
|
|
|
- f_1.close()
|
|
|
- with open(file_1, 'w', encoding="utf-8") as f_1:
|
|
|
- f_1.write(str1)
|
|
|
- f_1.close()
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- ratio = int(SequenceMatcher(None, str1.lower(), post_html.lower()).ratio() * 100)
|
|
|
- if (ratio > 70):
|
|
|
- out_str += f"{small_filename_1}\n"
|
|
|
- out_str += f"ratio = {ratio}\n"
|
|
|
- if (ratio > max_ratio):
|
|
|
- max_ratio = ratio
|
|
|
- max_ratio_file = small_filename_1
|
|
|
+ out_str += f"количество совпадающих слов: {len(c)} / {ratio}%\n\n"
|
|
|
+
|
|
|
+if not lection_exist:
|
|
|
+ out_str += "Лекция НЕ найдена\n"
|
|
|
|
|
|
+out_str +="\n\n"
|
|
|
print(out_str)
|
|
|
-print()
|
|
|
-print(f"max ratio: {max_ratio}%")
|
|
|
-print(f"max ratio file: {max_ratio_file}")
|
|
|
-print("success")
|
|
|
+
|
|
|
+# запись лога
|
|
|
+log_path = os.path.join(BASE_DIR, "log.md")
|
|
|
+with open(log_path, "r", encoding="utf-8") as f_log:
|
|
|
+ prev_str = f_log.read()
|
|
|
+
|
|
|
+prev_str = out_str + prev_str
|
|
|
+with open(log_path, "w", encoding="utf-8") as f_log:
|
|
|
+ f_log.write(prev_str)
|
|
|
+ f_log.close()
|
|
|
+
|
|
|
|