|
@@ -11,18 +11,19 @@ from nltk.corpus import stopwords
|
|
|
import pymorphy2
|
|
|
from string import punctuation
|
|
|
|
|
|
+from thefuzz import fuzz
|
|
|
|
|
|
# ------------------------------- НАСТРОЙКИ ------------
|
|
|
# директория файла (на уровень выше, для структуры репозиториев 2 сем. 2022-23)
|
|
|
BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
|
|
# проверяемая директория
|
|
|
-LECTION_DIR = os.path.join("EASvZI", "Лекции")
|
|
|
-# LECTION_DIR = os.path.join("TZI", "Лекции", "ПМ3.2")
|
|
|
+# LECTION_DIR = os.path.join("EASvZI", "Лекции")
|
|
|
+LECTION_DIR = os.path.join("TZI", "Лекции", "ПМ3.2")
|
|
|
|
|
|
# кого проверяем
|
|
|
who = "Савкин"
|
|
|
# ссылка для проверки
|
|
|
-url = "http://213.155.192.79:3001/ypv/EASvZI/raw/master/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.6.100_%d0%9c%d0%b5%d1%85%d0%b0%d0%bd%d0%b8%d0%b7%d0%bc%d1%8b_%d0%b8_%d0%bc%d0%b5%d1%82%d0%be%d0%b4%d1%8b_%d0%b7%d0%b0%d1%89%d0%b8%d1%82%d1%8b_%d0%b8%d0%bd%d1%84%d0%be%d1%80%d0%bc%d0%b0%d1%86%d0%b8%d0%b8_%d0%b2_%d1%80%d0%b0%d1%81%d0%bf%d1%80%d0%b5%d0%b4%d0%b5%d0%bb%d0%b5%d0%bd%d0%bd%d1%8b%d1%85_%d0%90%d0%a1/zurov.md"
|
|
|
+url = "http://213.155.192.79:3001/ypv/TZI/raw/master/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/%d0%9f%d0%9c3.2/%d0%9f2.2.100_%d0%9f%d1%80%d0%b8%d0%bd%d1%86%d0%b8%d0%bf%d1%8b_%d1%83%d1%81%d1%82%d1%80%d0%be%d0%b9%d1%81%d1%82%d0%b2%d0%b0_%d1%80%d0%b0%d0%b1%d0%be%d1%82%d1%8b_%d0%bf%d1%80%d0%b8%d0%bc%d0%b5%d0%bd%d0%b5%d0%bd%d0%b8%d1%8f_%d1%81%d1%80%d0%b5%d0%b4%d1%81%d1%82%d0%b2_%d0%b0%d1%83%d1%82%d0%b5%d0%bd%d1%82%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d0%bf%d0%be%d0%bb%d1%8c%d0%b7%d0%be%d0%b2%d0%b0%d1%82%d0%b5%d0%bb%d1%8f/u19-23Cherkasov.md"
|
|
|
|
|
|
# ------------------------------- / НАСТРОЙКИ ------------
|
|
|
|
|
@@ -85,7 +86,7 @@ post_list = post_html.split("\n")
|
|
|
header_exist = True
|
|
|
line_1 = post_list[0]
|
|
|
if (line_1[0:2]) != "# ":
|
|
|
- print("Заголовок статьи не найден")
|
|
|
+ print(f"Заголовок статьи не найден: {ord(line_1[0:1])} {ord(line_1[1:2])} вместо {ord('#')} {ord(' ')}")
|
|
|
header_exist = False
|
|
|
|
|
|
# наличие вопросов и списка литературы
|
|
@@ -123,18 +124,29 @@ readme_list = readme_html.split("\n")
|
|
|
for readme_str in readme_list:
|
|
|
readme_str = readme_str.strip()
|
|
|
readme_str_list = readme_str.split(" ")
|
|
|
+ lection_number = readme_str_list[0]
|
|
|
readme_str_list.pop(0)
|
|
|
name_str = " ".join(readme_str_list)
|
|
|
name_str = name_str.replace(".", "")
|
|
|
-
|
|
|
- if (str(name_str) == str(header_text)):
|
|
|
- print("Лекция найдена в readme")
|
|
|
- lection_exist = True
|
|
|
- in_lections = True
|
|
|
-
|
|
|
- post_tokens, post_uniq_text = preprocess_text(post_html)
|
|
|
- print(f"количество уникальных слов: {len(set(post_tokens))}")
|
|
|
+ name_str = name_str.strip()
|
|
|
+
|
|
|
+ if len(name_str)>0:
|
|
|
+ """
|
|
|
+ print(lection_number)
|
|
|
+ print(name_str)
|
|
|
+ print(header_text)
|
|
|
+ print(f"{ord(name_str[0:1])} {ord(name_str[1:2])} {ord(name_str[2:3])} вместо {ord(header_text[0:1])} {ord(header_text[1:2])} {ord(header_text[2:3])}")
|
|
|
+ print(fuzz.partial_ratio(name_str, header_text))
|
|
|
print()
|
|
|
+ """
|
|
|
+ if (str(name_str) == str(header_text)):
|
|
|
+ print("Лекция найдена в readme")
|
|
|
+ lection_exist = True
|
|
|
+ in_lections = True
|
|
|
+
|
|
|
+ post_tokens, post_uniq_text = preprocess_text(post_html)
|
|
|
+ print(f"количество уникальных слов: {len(set(post_tokens))}")
|
|
|
+ print()
|
|
|
|
|
|
|
|
|
|