2 年之前 · 92cea2f905
--- a/ЭАСвЗИ/Лекции/plagiat_1.py
+++ b/ЭАСвЗИ/Лекции/plagiat_1.py
@@ -43,11 +43,14 @@ def preprocess_text(text):
 
				 # директория файла
			
 
				 BASE_DIR = os.path.abspath(os.path.dirname(__file__))
			
 
				 
			
 
				-
			
 
				 # ссылка для проверки
			
 
				 url = "http://213.155.192.79:3001/ypv/up/raw/master/%d0%ad%d0%90%d0%a1%d0%b2%d0%97%d0%98/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/Doc.md"
			
 
				+who = "Савкин С."
			
 
				+
			
 
				 
			
 
				-print()
			
 
				+
			
 
				+now = datetime.datetime.now().strftime('%d-%m-%Y %H:%M')
			
 
				+out_str = f"Проверка: {who}, время проверки: {now} \n"
			
 
				 
			
 
				 response = requests.get(url)
			
 
				 post_html = response.text
			
@@ -57,7 +60,7 @@ post_list = post_html.split("\n")
 
				 # проверяем правильность оформления 1й строки
			
 
				 line_1 = post_list[0]
			
 
				 if (line_1[0]) != "#":
			
 
				-    print("Заголовок статьи не найден")
			
 
				+    out_str += "Заголовок статьи не найден\n"
			
 
				 
			
 
				 header_text = line_1.replace("# ", "")
			
 
				 header_text = header_text.replace(".", "")
			
@@ -82,12 +85,11 @@ for readme_str in readme_list:
 
				     name_str = name_str.replace(".", "")
			
 
				 
			
 
				     if (str(name_str) == str(header_text)):
			
 
				-        print("Лекция найдена")
			
 
				+        out_str += "Лекция найдена\n"
			
 
				         lection_exist = True
			
 
				 
			
 
				         post_tokens, post_uniq_text = preprocess_text(post_html)
			
 
				-        print(f"количество уникальных слов: {len(set(post_tokens))}")
			
 
				-        print()
			
 
				+        out_str += f"количество уникальных слов: {len(set(post_tokens))}\n\n"
			
 
				 
			
 
				 
			
 
				 
			
@@ -100,7 +102,7 @@ for readme_str in readme_list:
 
				     if lection_exist and (str(name_str) != str(header_text)):
			
 
				         variant_name, t = readme_str.split("]")
			
 
				         variant_name = variant_name.strip("[")
			
 
				-        print(f"проверяю {variant_name}")
			
 
				+        out_str += f"проверяю {variant_name}\n"
			
 
				         t, variant_uri = readme_str.split("(")
			
 
				         variant_uri = variant_uri.replace("),", "")
			
 
				         variant_uri = variant_uri.strip()
			
@@ -114,60 +116,27 @@ for readme_str in readme_list:
 
				                 variant_html = f.read()
			
 
				 
			
 
				         variant_tokens, variant_uniq_text = preprocess_text(variant_html)
			
 
				-        print(f"количество уникальных слов варианта: {len(set(variant_tokens))}")
			
 
				+        out_str += f"количество уникальных слов варианта: {len(set(variant_tokens))}\n"
			
 
				 
			
 
				         # пересечение множеств 
			
 
				         c = list(set(post_tokens) & set(variant_tokens))
			
 
				         ratio = 1 - (len(c) / len(set(post_tokens)))
			
 
				-        print(f"количество совпадающих слов: {len(c)} / {ratio}%")
			
 
				-        print()
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-exit()
			
 
				-files_paths = []
			
 
				-dirs = os.listdir(BASE_DIR)
			
 
				-for dir in dirs:
			
 
				-    dir_path = os.path.join(BASE_DIR, dir)
			
 
				-    if os.path.isdir(dir_path) and (dir != "__pycache__"):
			
 
				-        files = os.listdir(dir_path)
			
 
				-        for file in files:
			
 
				-            file_path = os.path.join(BASE_DIR, dir, file)
			
 
				-            filename, fileext = os.path.splitext(file)
			
 
				-
			
 
				-            if os.path.isfile(file_path) and (fileext=='.md'):
			
 
				-                files_paths.append(file_path)
			
 
				-
			
 
				-out_str = ""
			
 
				-max_ratio = 0
			
 
				-max_ratio_file = ""
			
 
				-for file_1 in tqdm(files_paths):
			
 
				-    small_filename_1 = str(file_1).replace(BASE_DIR, "").strip("\\")
			
 
				-    try:
			
 
				-        with open(file_1, encoding="utf-8") as f_1:
			
 
				-            str1 = f_1.read()
			
 
				-    except:
			
 
				-        with open(file_1, encoding="cp1251") as f_1:
			
 
				-            str1 = f_1.read()
			
 
				-            f_1.close()
			
 
				-        with open(file_1, 'w', encoding="utf-8") as f_1:
			
 
				-            f_1.write(str1)
			
 
				-            f_1.close()
			
 
				-                    
			
 
				-
			
 
				-
			
 
				-    ratio = int(SequenceMatcher(None, str1.lower(), post_html.lower()).ratio() * 100)
			
 
				-    if (ratio > 70):
			
 
				-        out_str += f"{small_filename_1}\n"
			
 
				-        out_str += f"ratio = {ratio}\n"
			
 
				-    if (ratio > max_ratio):
			
 
				-        max_ratio = ratio
			
 
				-        max_ratio_file = small_filename_1
			
 
				+        out_str += f"количество совпадающих слов: {len(c)} / {ratio}%\n\n"
			
 
				+
			
 
				+if not lection_exist:
			
 
				+    out_str += "Лекция НЕ найдена\n"    
			
 
				 
			
 
				+out_str +="\n\n"
			
 
				 print(out_str)
			
 
				-print()
			
 
				-print(f"max ratio: {max_ratio}%")
			
 
				-print(f"max ratio file: {max_ratio_file}")
			
 
				-print("success")
			
 
				+
			
 
				+# запись лога
			
 
				+log_path = os.path.join(BASE_DIR, "log.md")
			
 
				+with open(log_path, "r", encoding="utf-8") as f_log:
			
 
				+    prev_str = f_log.read()
			
 
				+
			
 
				+prev_str = out_str + prev_str
			
 
				+with open(log_path, "w", encoding="utf-8") as f_log:
			
 
				+    f_log.write(prev_str)
			
 
				+    f_log.close()
			
 
				+