ypv %!s(int64=2) %!d(string=hai) anos
pai
achega
2d4d195c51
Modificáronse 2 ficheiros con 35 adicións e 30 borrados
  1. 32 30
      plagiat_1_full.v2.py
  2. 3 0
      ПКС/2022-23/README.md

+ 32 - 30
plagiat_1_full.v2.py

@@ -20,11 +20,11 @@ from tqdm import tqdm
 BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
 # проверяемая директория
 # LECTION_DIR = os.path.join("ISRPO", "Лекции")
-LECTION_DIR = os.path.join("EASvZI", "Лекции")
-# LECTION_DIR = os.path.join("TZI", "Лекции", "ПМ3.2")
+# LECTION_DIR = os.path.join("EASvZI", "Лекции")
+LECTION_DIR = os.path.join("TZI", "Лекции", "ПМ3.2")
 
 # ссылка для проверки
-url = "http://213.155.192.79:3001/u20lebed/ISRPO/raw/88f913f3d1af9ee9bed0468b183ec9d4f6e5f2b4/2022-23/%d0%ad%d0%ba%d0%b7%d0%b0%d0%bc%d0%b5%d0%bd/Lebedkin.md"
+url = "http://213.155.192.79:3001/u19-23cukanov/TZI/raw/b722d7d9874f8245f669ee85687b441489281033/2022-23/%d0%94%d0%b8%d1%84.%d0%b7%d0%b0%d1%87%d0%b5%d1%82_2%d1%81%d0%b5%d0%bc/%d0%a6%d1%83%d0%ba%d0%b0%d0%bd%d0%be%d0%b2_17_%d0%b1%d0%b8%d0%bb%d0%b5%d1%82.md"
 
 # ------------------------------- / НАСТРОЙКИ ------------
 
@@ -103,35 +103,37 @@ readme_list = readme_html.split("\n")
 for readme_str in tqdm(readme_list):
     if '[' in readme_str:
         # print(f"проверяю {readme_str}")
-        variant_name, t = readme_str.split("]")
-        variant_name = variant_name.strip("[")
-        t, variant_uri = readme_str.split("(")
-        variant_uri = variant_uri.replace("),", "")
-        variant_uri = variant_uri.replace(")", "")
-        variant_uri = variant_uri.strip()
-        
-        variant_path = os.path.join(BASE_DIR, LECTION_DIR, variant_uri)
         try:
-            with open(variant_path, encoding="utf-8") as f:
-                variant_html = f.read()
+            variant_name, t = readme_str.split("]")
+            variant_name = variant_name.strip("[")
+            t, variant_uri = readme_str.split("(")
+            variant_uri = variant_uri.replace("),", "")
+            variant_uri = variant_uri.replace(")", "")
+            variant_uri = variant_uri.strip()
+            
+            variant_path = os.path.join(BASE_DIR, LECTION_DIR, variant_uri)
+            try:
+                with open(variant_path, encoding="utf-8") as f:
+                    variant_html = f.read()
+            except:
+                with open(variant_path, encoding="cp1251") as f:
+                    variant_html = f.read()
+
+            variant_tokens, variant_uniq_text = preprocess_text(variant_html)
+            # print(f"количество уникальных слов варианта: {len(set(variant_tokens))}")
+
+            # пересечение множеств 
+            min_tokens_len = min([len(set(post_tokens)), len(set(variant_tokens))])
+            c = list(set(post_tokens) & set(variant_tokens))
+            ratio = (1 - (len(c) / min_tokens_len)) * 100
+            if ratio < min_ratio:
+                min_ratio = ratio
+                min_ratio_file = variant_path
+
+            # print(f"количество совпадающих слов: {len(c)} / {ratio:.2f}%")
+            # print()
         except:
-            with open(variant_path, encoding="cp1251") as f:
-                variant_html = f.read()
-
-        variant_tokens, variant_uniq_text = preprocess_text(variant_html)
-        # print(f"количество уникальных слов варианта: {len(set(variant_tokens))}")
-
-        # пересечение множеств 
-        min_tokens_len = min([len(set(post_tokens)), len(set(variant_tokens))])
-        c = list(set(post_tokens) & set(variant_tokens))
-        ratio = (1 - (len(c) / min_tokens_len)) * 100
-        if ratio < min_ratio:
-            min_ratio = ratio
-            min_ratio_file = variant_path
-
-        # print(f"количество совпадающих слов: {len(c)} / {ratio:.2f}%")
-        # print()
-
+            print(f"Ошибка распаковки {readme_str}")
 
 print()
 print(f"min_ratio: {min_ratio}%")

+ 3 - 0
ПКС/2022-23/README.md

@@ -44,6 +44,9 @@ https://habr.com/ru/post/712008/
 ██    ██   ██   ██      ██       ██   ██     ███   ██████     ██████
 
 
+15Mar23 Ср. 2П
+**Отсутств:** Лапченко,  
+
 01Mar23 Ср. 2П
 **Отсутств:** Лисеенко, Баранов