|
@@ -8,7 +8,7 @@ import requests
|
|
|
import nltk
|
|
|
#nltk.download("stopwords")
|
|
|
from nltk.corpus import stopwords
|
|
|
-import pymorphy3
|
|
|
+import pymorphy2
|
|
|
from string import punctuation
|
|
|
|
|
|
|
|
@@ -19,12 +19,12 @@ BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
|
|
LECTION_DIR = os.path.join("EASvZI", "Лекции")
|
|
|
|
|
|
# ссылка для проверки
|
|
|
-url = "http://213.155.192.79:3001/u20-24zurov/EASvZI/raw/c3265c710cc842b0c2c670a84a0b6a435d708e5e/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.4.120_%d0%9f%d1%80%d0%b0%d0%b2%d0%be%d0%b2%d1%8b%d0%b5_%d0%bc%d0%b5%d1%80%d1%8b_%d0%97%d0%98_%d0%b2_%d0%90%d0%a1/Zurov.md"
|
|
|
+url = "http://213.155.192.79:3001/u21-25cupikov/EASvZI/src/0bf089507dd73305a822c6358ab062fa306f761e/%d0%9b%d0%b5%d0%ba%d1%86%d0%b8%d0%b8/1.3.300_%d0%9a%d1%80%d0%b8%d1%82%d0%b5%d1%80%d0%b8%d0%b8_%d0%ba%d0%bb%d0%b0%d1%81%d1%81%d0%b8%d1%84%d0%b8%d0%ba%d0%b0%d1%86%d0%b8%d0%b8_%d1%83%d0%b3%d1%80%d0%be%d0%b7/%d0%a6%d1%83%d0%bf%d0%b8%d0%ba%d0%be%d0%b2.md"
|
|
|
|
|
|
# ------------------------------- / НАСТРОЙКИ ------------
|
|
|
|
|
|
|
|
|
-
|
|
|
+url = url.replace("src", "raw")
|
|
|
|
|
|
|
|
|
|
|
@@ -36,7 +36,7 @@ url = "http://213.155.192.79:3001/u20-24zurov/EASvZI/raw/c3265c710cc842b0c2c670a
|
|
|
|
|
|
|
|
|
#Create lemmatizer and stopwords list
|
|
|
-morph = pymorphy3.MorphAnalyzer()
|
|
|
+morph = pymorphy2.MorphAnalyzer()
|
|
|
russian_stopwords = stopwords.words("russian")
|
|
|
|
|
|
#Preprocess function
|