from bs4 import BeautifulSoup import urllib.request import time items = {} def get_ingredients (text): ingredients = {} soup = BeautifulSoup(text, 'lxml') table = soup.findAll('table', {'class':'add_stat_table'})[0] for child in table.children: a = tuple(tuple(child.children)[0].children)[0] ingredients[a.attrs['href']] = int(tuple(a.children)[1].text.split('( x')[-1][:-2]) return ingredients def get_ingredients2 (text): ingredients = {} soup = BeautifulSoup(text, 'lxml') table = soup.findAll('table', {'class':'add_stat_table'})[0] key = '' for child in tuple(tuple(table.children)[0].children)[0].children: if child.has_attr('href'): key = child.text ingredients[key] = None elif child.text.startswith('x'): ingredients[key] = int(child.text[2:]) # On skip les arbres et les cailloux… for i in ingredients: if ingredients[i] is None: return {} return ingredients def request(url): time.sleep(1) r = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) r = urllib.request.urlopen(r) return r.read() def recetter_un_objet (url): hostname = '/'.join(url.split('/')[:3]) try: ingredients = get_ingredients(request(url)) except Exception as e: print('Error parsing ', url) raise e for ingredient in ingredients: try: i2 = get_ingredients2(request(hostname + ingredient)) except Exception as e: print('Error parsing ', url) raise e for i in i2: if i not in items: items[i] = 0 items[i] += i2[i] * ingredients[ingredient] if __name__ == '__main__': print('Compteur de recette. Fonctionne avec https://genshin.honeyhunterworld.com en septembre 2021. On lit le fichier urls.txt') with open('urls.txt', 'r') as f: for line in f: line = line.strip() print('parsing ', line) recetter_un_objet(line) print(items)