2 years ago · 29d10e0b4e
--- a/ТЗИ/Лекции/ПМ3.2/plagiat_1.v2.py
+++ b/ТЗИ/Лекции/ПМ3.2/plagiat_1.v2.py
@@ -11,6 +11,31 @@ from nltk.corpus import stopwords
 
				 import pymorphy2
			
 
				 from string import punctuation
			
 
				 
			
 
				+
			
 
				+# ------------------------------- НАСТРОЙКИ ------------
			
 
				+# директория файла
			
 
				+BASE_DIR = os.path.abspath(os.path.dirname(__file__))
			
 
				+# проверяемая директория
			
 
				+LECTION_DIR = os.path.join("ЭАСвЗИ", "Лекции")
			
 
				+# кого проверяем
			
 
				+who = "Савкин"
			
 
				+# ссылка для проверки
			
 
				+url = "http://213.155.192.79:3001/ypv/up/raw/master/%d0%ad%d0%90%d0%a1%d0%b2%d0%97%d0%98/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/Doc.md"
			
 
				+
			
 
				+# ------------------------------- / НАСТРОЙКИ ------------
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				 #Create lemmatizer and stopwords list
			
 
				 morph = pymorphy2.MorphAnalyzer()
			
 
				 russian_stopwords = stopwords.words("russian")
			
@@ -40,15 +65,15 @@ def preprocess_text(text):
 
				 
			
 
				 
			
 
				 
			
 
				-# директория файла
			
 
				-BASE_DIR = os.path.abspath(os.path.dirname(__file__))
			
 
				 
			
 
				 
			
 
				-# ссылка для проверки
			
 
				-url = "http://213.155.192.79:3001/ypv/up/raw/master/%d0%ad%d0%90%d0%a1%d0%b2%d0%97%d0%98/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/Doc.md"
			
 
				 
			
 
				 print()
			
 
				 
			
 
				+now = datetime.datetime.now().strftime('%d-%m-%Y %H:%M')
			
 
				+out_str = f"Проверка: {who}, время проверки: {now} \n"
			
 
				+print(out_str)
			
 
				+
			
 
				 response = requests.get(url)
			
 
				 post_html = response.text
			
 
				 
			
@@ -64,7 +89,7 @@ header_text = header_text.replace(".", "")
 
				 header_text = header_text.strip()
			
 
				 
			
 
				 # ищем другие лекции по этой теме
			
 
				-readme_path = os.path.join(BASE_DIR, "README.md")
			
 
				+readme_path = os.path.join(BASE_DIR, LECTION_DIR, "README.md")
			
 
				 try:
			
 
				     with open(readme_path, encoding="utf-8") as f:
			
 
				         readme_html = f.read()
			
@@ -105,7 +130,7 @@ for readme_str in readme_list:
 
				         variant_uri = variant_uri.replace("),", "")
			
 
				         variant_uri = variant_uri.strip()
			
 
				         
			
 
				-        variant_path = os.path.join(BASE_DIR, variant_uri)
			
 
				+        variant_path = os.path.join(BASE_DIR, LECTION_DIR, variant_uri)
			
 
				         try:
			
 
				             with open(variant_path, encoding="utf-8") as f:
			
 
				                 variant_html = f.read()
			
--- a/ТЗИ/2022-23/README.md
+++ b/ТЗИ/2022-23/README.md
@@ -102,7 +102,7 @@ https://habr.com/ru/company/first/blog/698042/
 
				 Черкасов - 5  
			
 
				 Шаповалов - 5  
			
 
				 
			
 
				-#### Суммарно на всех - 110 
			
 
				+#### Суммарно на всех - 111 
			
 
				 Абдулрахманов - 5+5+2+3 = 15  
			
 
				 Бешляга - 5  
			
 
				 Грищенко - 5+3 = 8 23  
			
@@ -143,6 +143,17 @@ https://habr.com/ru/company/first/blog/698042/
 
				 
			
 
				 
			
 
				 
			
 
				+14Dec22 Сб. 2Л ТЗИ, 1Л ИТСФЗОИ
			
 
				+**Опоздание:** Цуканов, 
			
 
				+**Отсутств:** Юркин, Грищенко, Деев, Бешляга,  
			
 
				+**Новости:** Поправко - 4   
			
 
				+**Доклады:** Зенченко - 4, добавить правильную первую строку в 1.1.100 Характеристики потенциально опасных объектов.  
			
 
				+Одинцов - переделать, Характеристики потенциально опасных объектов - конфиденциальность.  
			
 
				+Спинков - 5, Силин - 5,
			
 
				+
			
 
				+
			
 
				+**Ответы:**  
			
 
				+**Лекции:**  
			
 
				 
			
 
				 
			
 
				 13Dec22 Пт. 1П ИТСФЗОИ  
			
--- a/ЭАСвЗИ/Лекции/plagiat_1.py
+++ b/ЭАСвЗИ/Лекции/plagiat_1.py
@@ -1,142 +0,0 @@
 
				-import os
			
 
				-from difflib import SequenceMatcher
			
 
				-from tqdm import tqdm
			
 
				-import datetime
			
 
				-import requests
			
 
				-
			
 
				-# download stopwords corpus, you need to run it once
			
 
				-import nltk
			
 
				-#nltk.download("stopwords")
			
 
				-from nltk.corpus import stopwords
			
 
				-import pymorphy2
			
 
				-from string import punctuation
			
 
				-
			
 
				-#Create lemmatizer and stopwords list
			
 
				-morph = pymorphy2.MorphAnalyzer()
			
 
				-russian_stopwords = stopwords.words("russian")
			
 
				-
			
 
				-#Preprocess function
			
 
				-def preprocess_text(text):
			
 
				-    translator = str.maketrans(punctuation, ' '*len(punctuation))
			
 
				-    words = text.translate(translator)
			
 
				-    words = words.lower().split()
			
 
				-    
			
 
				-    # очистка от прилегающего к слову мусора (слово, "или так")
			
 
				-    clear_words = []
			
 
				-    for word in words:
			
 
				-        clear_word = ""
			
 
				-        for s in word:
			
 
				-            if not s in punctuation:
			
 
				-                clear_word = clear_word + s
			
 
				-        clear_words.append(clear_word)
			
 
				-    tokens = []
			
 
				-    tokens = [morph.parse(token)[0].normal_form for token in clear_words if token not in russian_stopwords\
			
 
				-            and token != " " \
			
 
				-            and token.strip() not in punctuation \
			
 
				-            ]
			
 
				-
			
 
				-    text = " ".join(tokens)    
			
 
				-    return tokens, text
			
 
				-
			
 
				-
			
 
				-
			
 
				-# директория файла
			
 
				-BASE_DIR = os.path.abspath(os.path.dirname(__file__))
			
 
				-
			
 
				-# ссылка для проверки
			
 
				-url = "http://213.155.192.79:3001/ypv/up/raw/master/%d0%ad%d0%90%d0%a1%d0%b2%d0%97%d0%98/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/Doc.md"
			
 
				-who = "Савкин С."
			
 
				-
			
 
				-
			
 
				-
			
 
				-now = datetime.datetime.now().strftime('%d-%m-%Y %H:%M')
			
 
				-out_str = f"Проверка: {who}, время проверки: {now} \n"
			
 
				-
			
 
				-response = requests.get(url)
			
 
				-post_html = response.text
			
 
				-
			
 
				-post_list = post_html.split("\n")
			
 
				-
			
 
				-# проверяем правильность оформления 1й строки
			
 
				-line_1 = post_list[0]
			
 
				-if (line_1[0]) != "#":
			
 
				-    out_str += "Заголовок статьи не найден\n"
			
 
				-
			
 
				-header_text = line_1.replace("# ", "")
			
 
				-header_text = header_text.replace(".", "")
			
 
				-header_text = header_text.strip()
			
 
				-
			
 
				-# ищем другие лекции по этой теме
			
 
				-readme_path = os.path.join(BASE_DIR, "README.md")
			
 
				-try:
			
 
				-    with open(readme_path, encoding="utf-8") as f:
			
 
				-        readme_html = f.read()
			
 
				-except:
			
 
				-    with open(readme_path, encoding="cp1251") as f:
			
 
				-        readme_html = f.read()
			
 
				-
			
 
				-lection_exist = False
			
 
				-readme_list = readme_html.split("\n")
			
 
				-for readme_str in readme_list:
			
 
				-    readme_str = readme_str.strip()
			
 
				-    readme_str_list = readme_str.split(" ")
			
 
				-    readme_str_list.pop(0)
			
 
				-    name_str = " ".join(readme_str_list)
			
 
				-    name_str = name_str.replace(".", "")
			
 
				-
			
 
				-    if (str(name_str) == str(header_text)):
			
 
				-        out_str += "Лекция найдена\n"
			
 
				-        lection_exist = True
			
 
				-
			
 
				-        post_tokens, post_uniq_text = preprocess_text(post_html)
			
 
				-        out_str += f"количество уникальных слов: {len(set(post_tokens))}\n\n"
			
 
				-
			
 
				-
			
 
				-
			
 
				-    # ищем конец списка вариантов лекций (пустая строка)
			
 
				-    if lection_exist:
			
 
				-        if (readme_str == ""):
			
 
				-            lection_exist = False
			
 
				-
			
 
				-    # следующие после названия лекции строки
			
 
				-    if lection_exist and (str(name_str) != str(header_text)):
			
 
				-        variant_name, t = readme_str.split("]")
			
 
				-        variant_name = variant_name.strip("[")
			
 
				-        out_str += f"проверяю {variant_name}\n"
			
 
				-        t, variant_uri = readme_str.split("(")
			
 
				-        variant_uri = variant_uri.replace("),", "")
			
 
				-        variant_uri = variant_uri.strip()
			
 
				-        
			
 
				-        variant_path = os.path.join(BASE_DIR, variant_uri)
			
 
				-        try:
			
 
				-            with open(variant_path, encoding="utf-8") as f:
			
 
				-                variant_html = f.read()
			
 
				-        except:
			
 
				-            with open(variant_path, encoding="cp1251") as f:
			
 
				-                variant_html = f.read()
			
 
				-
			
 
				-        variant_tokens, variant_uniq_text = preprocess_text(variant_html)
			
 
				-        out_str += f"количество уникальных слов варианта: {len(set(variant_tokens))}\n"
			
 
				-
			
 
				-        # пересечение множеств 
			
 
				-        c = list(set(post_tokens) & set(variant_tokens))
			
 
				-        ratio = 1 - (len(c) / len(set(post_tokens)))
			
 
				-        out_str += f"количество совпадающих слов: {len(c)} / {ratio}%\n\n"
			
 
				-
			
 
				-if not lection_exist:
			
 
				-    out_str += "Лекция НЕ найдена\n"    
			
 
				-
			
 
				-out_str +="\n\n"
			
 
				-print(out_str)
			
 
				-
			
 
				-# запись лога
			
 
				-log_path = os.path.join(BASE_DIR, "log.md")
			
 
				-with open(log_path, "r", encoding="utf-8") as f_log:
			
 
				-    prev_str = f_log.read()
			
 
				-
			
 
				-prev_str = out_str + prev_str
			
 
				-with open(log_path, "w", encoding="utf-8") as f_log:
			
 
				-    f_log.write(prev_str)
			
 
				-    f_log.close()
			
 
				-
			
 
				-