An error occurred while loading the file. Please try again.
-
evwng authored806f37b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import math
import re
#Dado un html de superprof retorna una lista de tuplas
def get_teacher_list(html):
list_ = []
soup = BeautifulSoup(html, 'lxml')
teachers = soup.find_all('li', class_='container see')
for teacher in teachers:
link = teacher.find('a')['href']
nombre = teacher.find('div', {'data-announcement-name': True})['data-announcement-name']
precio = teacher.find('li', class_='pricing').find('span', class_='text').get_text().replace("$", "").replace(".", "").replace("/hr", "")
precio = int(precio)
rating = teacher.find('span', class_='emphasis')
if rating is None:
continue
rating = float(rating.get_text())
c_rating = teacher.find('span', string=re.compile(r'\(\d+ opiniones\)'))
if c_rating is None:
continue
c_rating = int(re.search(r'\d+', c_rating.get_text()).group())
list_.append((link, nombre, precio, rating, c_rating))
list_ = sorted(list_, key=orden_ratings, reverse=True)
return list_
#Ordenar las valoraciones ponderadas
def orden_ratings(teacher):
return teacher[3] * math.log(1 + teacher[4])
#Dado un url lo pasa a get_teacher_list y retorna su lista
def get_list_from_link(url):
driver = webdriver.Chrome()
driver.get(url)
time.sleep(10)
html = driver.page_source
driver.quit()
return get_teacher_list(html)