1
0

safe_request.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import os, json, urllib
  2. import time
  3. import hashlib
  4. import random, time
  5. import requests
  6. def get_url_html(url, use_cache=False, CACHE_DIR = os.path.join("D:\\", "cache")):
  7. url_hash = hashlib.md5(url.encode('utf-8')).hexdigest()
  8. hash_path = os.path.join(CACHE_DIR, f"{url_hash}.html")
  9. cache_not_exist = False
  10. if use_cache:
  11. if os.path.isfile(hash_path):
  12. with open(hash_path, 'r', encoding="utf-8") as f:
  13. post_html = f.read()
  14. else:
  15. cache_not_exist = True
  16. if not(use_cache) or cache_not_exist:
  17. is_not_exec = True
  18. sleep_time = 1
  19. while is_not_exec:
  20. try:
  21. user_agent = random.choice(user_agents)
  22. host = urllib.parse.urlparse(url).netloc
  23. referer = f"https://{host}/"
  24. headers = {
  25. "Host": str(host),
  26. 'User-Agent': str(user_agent),
  27. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
  28. 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
  29. 'Accept-Encoding': 'gzip, deflate, br',
  30. 'Referer': str(referer),
  31. 'Upgrade-Insecure-Requests': '1',
  32. 'Connection': 'keep-alive'}
  33. response = requests.get(url, headers=headers)
  34. #response.encoding='windows-1251'
  35. is_not_exec = False
  36. except:
  37. print(f"{url} -> Ошибка сайта донора, ждем {sleep_time} с.")
  38. time.sleep(sleep_time)
  39. sleep_time += 1
  40. post_html = response.text
  41. if use_cache:
  42. with open(hash_path, 'w', encoding="utf-8") as f:
  43. f.write(post_html)
  44. return post_html
  45. def get_url_content(url):
  46. is_not_exec = True
  47. sleep_time = 1
  48. while is_not_exec:
  49. try:
  50. user_agent = random.choice(user_agents)
  51. referer = user_agent
  52. host = urllib.parse.urlparse(url).netloc
  53. headers = {
  54. "Host": str(host),
  55. 'User-Agent': str(user_agent),
  56. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
  57. 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
  58. 'Accept-Encoding': 'gzip, deflate, br',
  59. 'Referer': str(referer),
  60. 'Upgrade-Insecure-Requests': '1',
  61. 'Connection': 'keep-alive'}
  62. response = requests.get(url, headers=headers)
  63. #response.encoding='windows-1251'
  64. is_not_exec = False
  65. except:
  66. # print(f"\nОшибка сайта донора контента, ждем {sleep_time} с.")
  67. time.sleep(sleep_time)
  68. sleep_time += 1
  69. post_content = response.content
  70. return post_content
  71. user_agents = [
  72. "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:45.0) Gecko/20100101 Firefox/45.0",
  73. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0",
  74. "Mozilla/5.0 (Windows NT 5.1; rv:23.0) Gecko/20100101 Firefox/23.0",
  75. "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
  76. "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
  77. "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
  78. "Mozilla/5.0 (Windows NT 6.1; rv:23.0) Gecko/20100101 Firefox/23.0",
  79. "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
  80. "Opera/9.80 (Windows NT 5.1) Presto/2.12.388 Version/12.16",
  81. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
  82. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 YaBrowser/1.7.1364.21027 Safari/537.22",
  83. "Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.16",
  84. "Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.25",
  85. "Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.15",
  86. "Mozilla / 5.0 (Macintosh; Intel Mac OS X 10.14; rv: 75.0) Gecko / 20100101 Firefox / 75.0",
  87. "Mozilla / 5.0 (Windows NT 6.1; Win64; x64; rv: 74.0) Gecko / 20100101 Firefox / 74.0",
  88. "Mozilla / 5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit / 537.36 (KHTML, как Gecko) Chrome / 80.0.3987.163 Safari / 537.36",
  89. "Dalvik/2.1.0 (Linux; U; Android 10; Mi 9T MIUI/V12.0.5.0.QFJMIXM)"
  90. ]