From a812c2526968bd6e0d4ea42640bab9947446f99a Mon Sep 17 00:00:00 2001 From: Adrian Amaglio Date: Sun, 12 Sep 2021 15:05:41 +0200 Subject: [PATCH] init --- main.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ urls.txt | 1 + 2 files changed, 70 insertions(+) create mode 100644 main.py create mode 100644 urls.txt diff --git a/main.py b/main.py new file mode 100644 index 0000000..1038419 --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ +from bs4 import BeautifulSoup +import urllib.request +import time + +items = {} +def get_ingredients (text): + ingredients = {} + soup = BeautifulSoup(text, 'lxml') + + table = soup.findAll('table', {'class':'add_stat_table'})[0] + for child in table.children: + a = tuple(tuple(child.children)[0].children)[0] + ingredients[a.attrs['href']] = int(tuple(a.children)[1].text.split('( x')[-1][:-2]) + + return ingredients + +def get_ingredients2 (text): + ingredients = {} + soup = BeautifulSoup(text, 'lxml') + + table = soup.findAll('table', {'class':'add_stat_table'})[0] + key = '' + for child in tuple(tuple(table.children)[0].children)[0].children: + if child.has_attr('href'): + key = child.text + ingredients[key] = None + elif child.text.startswith('x'): + ingredients[key] = int(child.text[2:]) + # On skip les arbres et les cailloux… + for i in ingredients: + if ingredients[i] is None: + return {} + return ingredients + +def request(url): + time.sleep(1) + r = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) + r = urllib.request.urlopen(r) + return r.read() + +def recetter_un_objet (url): + hostname = '/'.join(url.split('/')[:3]) + try: + ingredients = get_ingredients(request(url)) + except Exception as e: + print('Error parsing ', url) + raise e + + for ingredient in ingredients: + try: + i2 = get_ingredients2(request(hostname + ingredient)) + except Exception as e: + print('Error parsing ', url) + raise e + for i in i2: + if i not in items: + items[i] = 0 + items[i] += i2[i] * ingredients[ingredient] + +if __name__ == '__main__': + print('Compteur de recette. Fonctionne avec https://genshin.honeyhunterworld.com en septembre 2021. On lit le fichier urls.txt') + with open('urls.txt', 'r') as f: + for line in f: + line = line.strip() + print('parsing ', line) + recetter_un_objet(line) + print(items) + + diff --git a/urls.txt b/urls.txt new file mode 100644 index 0000000..c294e1e --- /dev/null +++ b/urls.txt @@ -0,0 +1 @@ +https://genshin.honeyhunterworld.com/db/item/hs_59/?lang=FR