import os
import re
import time
import requests
from selenium import webdriver
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, urlsplit
from docker_logic import create_docker_dir, create_docker_compose, create_nginx_docker

def create_project(dir_name):
  try:
    if os.path.exists(dir_name):
      print(f'Папка с именем {dir_name} существует!')
    else:
      print(dir_name)
      path_string = os.path.join(dir_name, 'public')
      os.makedirs(path_string)

      if os.path.exists(path_string):
        print(f'Проект {dir_name} успешно создан в папке {path_string}')
      else:
        print(f'Не удалось создать проект {dir_name} в папке {path_string}')
  except OSError as error:
    print(f'Ошибка при создании проекта {dir_name}: {error}')

def process_scripts(soup, main_link, dir):
    script_tags = soup.find_all("script")
    for script in script_tags:
        src = script.get('src')
        if src:
            if src.startswith(('http://', 'https://')):
                script_url = src
            elif src.startswith('/'):
                script_url = urljoin(main_link, src)
            else:
                script_url = main_link + '/' + src

            if script_url.endswith('.pdf'):
                continue

            try:
                response = requests.get(script_url)
                response.raise_for_status()
                js_content = response.text
            except requests.exceptions.RequestException as e:
                print(f"Failed to fetch {script_url}: {e}")
                continue

            file_name_with_ext = os.path.basename(script_url)
            file_name_with_ext = re.sub(r'[<>:"\\/|?*]', '_', file_name_with_ext)

            folder_string = os.path.dirname(src)
            folder_string = re.sub(r'[<>:"\\/|?*]', '_', folder_string)

            folder_path = os.path.join(dir, 'public', folder_string.lstrip('/'))
            try:
                os.makedirs(folder_path, exist_ok=True)
            except OSError as e:
                print(f"Failed to create directory {folder_path}: {e}")
                continue

            try:
                with open(os.path.join(folder_path, file_name_with_ext), "w", encoding="utf-8") as js_file:
                    js_file.write(js_content)
            except OSError as e:
                print(f"Failed to write file {os.path.join(folder_path, file_name_with_ext)}: {e}")

    return soup

def process_styles(soup, wdriver, main_link, dir):
  link_tags = soup.find_all("link", rel="stylesheet")
  for i, link in enumerate(link_tags):
    href = link.get('href')
    data_href = link.get('data-href')
    if (href and href.startswith('https://fonts')) or (data_href and data_href.startswith('https://fonts')):
      continue
    elif (href and (href.startswith('/') or href.startswith('.'))) or (data_href and (data_href.startswith('/') or data_href.startswith('.'))):
      css_url = main_link + (href or data_href)
    elif (href and href.startswith('https')) or (data_href and data_href.startswith('https')):
      css_url = href or data_href
    elif (href == None and data_href == None):
      continue
    else:
      css_url = main_link + '/' + (href or data_href)
    print(css_url)
    response = requests.get(css_url)
    css_content = response.text
    file_name_with_ext = os.path.basename(link['href'])
    file_name_with_ext = re.sub(r'[<>:"\\/|?*]', '_', file_name_with_ext)
    folder_string = str(link["href"]).rsplit("/", 1)[0]
    font_url_pattern = r"url\(['\"]?(.+?)['\"]?\)"
    print(folder_string)
    print(file_name_with_ext)
    if folder_string.startswith('/') or folder_string.startswith('.'):
      if folder_string.startswith('/'):
        if not os.path.exists(f'{dir}/public{folder_string}'):
          os.makedirs(f'{dir}/public{folder_string}')
        file_name_without_ext, _ = os.path.splitext(file_name_with_ext)
        with open(f"{dir}/public{folder_string}/{file_name_without_ext}.css", "w", encoding="utf-8") as css_file:
          css_file.write(css_content)
      else:
        folder_string = re.sub(r'^\./', '', folder_string)
        if not os.path.exists(f'{dir}/public/{folder_string}'):
          os.makedirs(f'{dir}/public/{folder_string}')
        file_name_without_ext, _ = os.path.splitext(file_name_with_ext)
        with open(f"{dir}/public/{folder_string}/{file_name_without_ext}.css", "w", encoding="utf-8") as css_file:
          css_file.write(css_content)
    elif folder_string.startswith('https'):
      parsed_url = urlparse(folder_string)
      print(parsed_url)
      if not os.path.exists(f'{dir}/public{parsed_url.path}'):
        os.makedirs(f'{dir}/public{parsed_url.path}', exist_ok=True)
      with open(f"{dir}/public{parsed_url.path}/{file_name_with_ext}", "w", encoding="utf-8") as css_file:
        css_file.write(css_content)
    elif folder_string != '':
      if not os.path.exists(f'{dir}/public/{folder_string}'):
        os.makedirs(f'{dir}/public/{folder_string}')
      file_name_without_ext, _ = os.path.splitext(file_name_with_ext)
      font_urls = re.findall(font_url_pattern, css_content)
      for font_url in font_urls:
        font_split = font_url.split('/')[-1].split('?')[0]
        font_response = requests.get(main_link + '/' + font_split)
        with open(f'{dir}/public/{folder_string}/{font_split}', 'wb') as font:
          font.write(font_response.content)
      with open(f"{dir}/public/{folder_string}/{file_name_without_ext}.css", "w", encoding="utf-8") as css_file:
        css_file.write(css_content)
    else:
      with open(f"{dir}/public/{file_name_with_ext}", "w", encoding="utf-8") as css_file:
        css_file.write(css_content)

  styles = wdriver.execute_script(
    """
    const styleSheets = Array.from(document.styleSheets);
    const cssRulesList = styleSheets
      .filter(sheet => !sheet.href || sheet.href.startsWith(window.location.origin))
      .flatMap(sheet => {
        try {
          return Array.from(sheet.cssRules) || [];
        } catch (e) {
          return [];
        }
      });

    return cssRulesList.map(rule => rule.cssText).join("\\n");
    """
  )

  path_list = [dir, 'public', "static", "css"]
  path_string = os.path.join(*path_list)
  if not os.path.exists(path_string):
    os.makedirs(path_string)
  font_url_pattern = r"url\(['\"]?([^\s'\"()]*\.(?:otf|ttf|woff|woff2))['\"]?\)"
  font_urls = re.findall(font_url_pattern, styles)
  for font_url in font_urls:
    print(font_url)
    font_split = font_url.split('/')[-1]
    font_response = requests.get(main_link + '/' + font_split)
    with open(f'{dir}/public/static/css/{font_split}', 'wb') as font:
      font.write(font_response.content)

  return soup

def process_images(soup, main_link, dir):
  try:
    
    for img in soup.find_all('img'):
      img_src = img['src']
      print(img_src)
      if not str(img_src).startswith('data:image') and not str(img_src).startswith('https'):
        if str(img_src).startswith('/'):
          img_full_link = f'{main_link}{img_src}'
        else:
          img_full_link = f'{main_link}/{img_src}'
        if 'url' in img_src:
          continue
        response = requests.get(img_full_link)
        path_without_file = os.path.dirname(img_src)

        os.makedirs(f'{dir}/public/{path_without_file}', exist_ok=True)

        file_name = str(img_src.split("/")[-1])
        output_file_name = ''
        if file_name.endswith('webp'):
          file_base_name = file_name.split('.')[0]
          output_file_name = f"{file_base_name}.png"
          img['src'] = img['src'].replace(file_name, output_file_name)
        else:
          output_file_name = file_name
        if path_without_file.startswith('/'):
          with open(f'{dir}/public{path_without_file}/{output_file_name}', 'wb') as file:
            file.write(response.content)
        elif path_without_file == '':
          continue
        else:
          with open(f'{dir}/public/{path_without_file}/{output_file_name}', 'wb') as file:
            file.write(response.content)
      elif not str(img_src).startswith('data:image') and str(img_src).startswith('https'):
        response = requests.get(img_src)
        parsed_url = urlparse(img_src)
        path_without_file = os.path.dirname(parsed_url.path)
        os.makedirs(f'{dir}/public/{path_without_file}', exist_ok=True)
        file_name = str(img_src.split("/")[-1])
        print(parsed_url)
        output_file_name = ''

        if file_name.endswith('webp'):
          file_base_name = file_name.split('.')[0]
          output_file_name = f"{file_base_name}.png"
          img['src'] = str(parsed_url.path).replace(file_name, output_file_name)
        else:
          output_file_name = file_name
          img['src'] = str(parsed_url.path)
        output_file_name = output_file_name.split('?')[0]
        if path_without_file.startswith('/'):
          with open(f'{dir}/public{path_without_file}/{output_file_name}', 'wb') as file:
            file.write(response.content)
        else:
          with open(f'{dir}/public/{path_without_file}/{output_file_name}', 'wb') as file:
            file.write(response.content)
      else:
        continue
  except:
    time.sleep(15)

  return soup

def save_soup_to_file(soup, dir, page):
  with open(f"{dir}/public/{page}.html", "w", encoding="utf-8") as file:
    file.write(str(soup.prettify()))
  
def get_page(link, dir, page, main_link):
  wdriver = webdriver.Firefox()
  wdriver.get(link)
  time.sleep(10)
  content = wdriver.page_source
  soup = BeautifulSoup(content, "html.parser")
  process_scripts(soup, main_link, dir)
  process_styles(soup, wdriver, main_link, dir)
  process_images(soup, main_link, dir)
  save_soup_to_file(soup, dir, page)
  wdriver.quit()

  print("Копирование сайта завершено.")
    
    
def main():
  web_link = input('Введите ссылку на страницу: ')
  parsed_web_link = urlsplit(web_link)
  main_link = f"{parsed_web_link.scheme}://{parsed_web_link.netloc}"
  page_name = input('Введите название страницы: ')
  if parsed_web_link.netloc == '':
    path = parsed_web_link.path.strip('/')
    link = path
  else:
    link = parsed_web_link.netloc

  create_project(link)
  create_docker_dir(link)
  create_docker_compose(link, parsed_web_link.netloc)
  create_nginx_docker(link, page_name)
  get_page(web_link, link, page_name, main_link)
    
    
if __name__ == '__main__':
  main()