Source code for DnD_5e.read_from_web

import urllib.request
import re
from bs4 import BeautifulSoup
from DnD_5e.utility_methods_dnd import ability_from_abbreviation

NON_BREAK_SPACE = '\xa0'

DIGIT_PATTERN = re.compile(r"(\d+)")

SIZE_RACE_ALIGNMENT_PATTERN = re.compile(r"(\w+) "  # size
                                         r"(\w+(?: \(\w+(?:, \w+)?\))?), "  # race
                                         r"((?:(?:lawful)|(?:neutral)|(?:chaotic)) (?:(?:good)|(?:neutral)|(?:evil))"  # alignment
                                         r"|(?:neutral)|(?:unaligned)|(?:any(?:.*)? alignment))")
SPEED_PATTERN = re.compile(r"(\d+) ft\.?(?:, climb (\d+) ft\.?)?(?:, fly (\d+) ft\.?)?(?:, swim (\d+) ft\.?)?")

# multiattack: multiple uses of a single attack
MA_SINGLE_PATTERN = re.compile(r"makes ([^ ]*?) ([^ ]*?) attacks\.")
# multiattack: different attacks
MA_MIX_PATTERN = re.compile(r"makes ([^ ]+).*? attacks: ([^ ]+) with its ([^ ]+)"
                            r"(?:, ([^ ]+) with its ([^ ]+),)*? and ([^ ]+) (?:(?:with its)|(?:to)) (.*?)"
                            r"(?: or (?:([^ ]+) with its )?(.*?))?\."
                            r"(?: It can make ([^ ]+) ([^ ]+) attack in place of its ([^ ]+) ([^ ]+) attacks)?")
# multiattack: repeat one of several attacks multiple times
MA_EITHER_PATTERN = re.compile(r"makes ([^ ]*?) attacks, either with its ([^ ]*?) or its ([^ ]*?)\.(?: It can replace ([^ ]*?) of those attacks with a ([^ ]*?) attack)?")

# for multiattacks like the one Medusa makes
MA_COMPLEX_PATTERN = re.compile(r"makes either ([a-z]+) .* attacks \– ([a-z]+) with its (.*) and ([a-z]+) with its (.*) \– or ([a-z]+) .* attacks with its (.*)\.")

# attack that can be melee or ranged
MELEE_OR_RANGED_PATTERN = re.compile(r"Melee or Ranged Weapon Attack: \+(\d+) to hit, reach (\d+) ft\.,? or range (\d+)\/(\d+) ft\.", re.IGNORECASE)

# a melee attack that has reach greater than 5ft or a ranged attack
REACH_RANGE_PATTERN = re.compile(r"(?:(reach) (\d+))?(?:(range) (\d+)\/(\d+))?")  # pylint: disable=anomalous-backslash-in-string

DAMAGE_PATTERN_STR = r"\d+ \((\d+)d(\d+)(?: \+ (\d+))?\) ([a-z]+) damage"
DAMAGE_PATTERN = re.compile(DAMAGE_PATTERN_STR + r"(?: plus " + DAMAGE_PATTERN_STR + ")?")

INVALID_NAME_STR = r"[ ,/'’()-]"
INVALID_NAME_PATTERN = re.compile(INVALID_NAME_STR)

# pattern for a single number of damage
SINGLE_DAMAGE_PATTERN = re.compile(r"(\d+) ([a-z]+) damage")

SAVING_THROW_PATTERN = re.compile(r"the target must make a dc (\d+) ([a-z]+) saving throw, taking \d+ "
                                  r"\((\d+d\d+)\) ([a-z]+) damage on a failed save(, or half as much damage on a successful one)")

SAVEORDIE_PATTERN = re.compile(r"If the target is a creature that has (\d+) hit points or fewer, it must succeed on a DC (\d+) (\w+) saving throw or die\.")

NUMBER_WORDS = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
FRACTION_MAP = {"1/8": 0.125, "1/4": 0.25, "1/2": 0.5}

LEVEL_SCHOOL_PATTERN = re.compile(r"(\d)(?:(?:st)|(?:nd)|(?:rd)|(?:th))-level ([a-z]+)(?: \((ritual)\))?")

CANTRIP_PATTERN = re.compile(r"([a-z]+) cantrip")

[docs] def get_attack_from_text(text): # pylint: disable=too-many-nested-blocks """ Get the information about an attack from a text description :param text: the text to read :return: the info about the attack found """ result = {} melee_ranged_match = MELEE_OR_RANGED_PATTERN.search(text) if melee_ranged_match: attack_mod = melee_ranged_match.group(1) reach = melee_ranged_match.group(2) normal_range = melee_ranged_match.group(3) disadv_range = melee_ranged_match.group(4) # [melee damage info] in melee or [range damage info] at range # [melee or ranged damage info], or [two-handed damage info] if used with two hands to make a melee attack damage_dice_versatile = None melee_idx = text.find("in melee") if melee_idx != -1: damage_dice_melee = get_damage_dice(text[:melee_idx]) damage_dice_range = get_damage_dice(text[melee_idx:]) else: two_handed_idx = text.find("with two hands") if two_handed_idx == -1: # same damage for melee or ranged damage_dice_melee = get_damage_dice(text) damage_dice_range = damage_dice_melee else: split_idx = text.find("damage, or") if split_idx == -1: raise ValueError("Don't know how to parse the damage for this melee/ranged attack") split_idx += 6 normal_damage = get_damage_dice(text[:split_idx]) damage_dice_melee = normal_damage damage_dice_range = normal_damage damage_dice_versatile = get_damage_dice(text[split_idx:]) result["melee_kwargs"] = f"damage_dice={damage_dice_melee}, attack_mod={attack_mod}, melee_range={reach}" result["range_kwargs"] = f"damage_dice={damage_dice_range}, attack_mod={attack_mod}, range={normal_range}" result["range_disadv_kwargs"] = f"damage_dice={damage_dice_range}, attack_mod={attack_mod}, " \ f"range={disadv_range}" if damage_dice_versatile: result["versatile_kwargs"] = f"damage_dice={damage_dice_versatile}, attack_mod={attack_mod}, melee_range={reach}" return result if "Weapon Attack:" in text: to_hit, range_info = text.split(", ")[:2] # TODO: care about number of targets to_hit = to_hit.split(" to hit")[0].split("Weapon Attack:")[1].strip() sign = to_hit[0] if sign == "+": to_hit = int(to_hit[1:]) elif sign == "-": to_hit = int(to_hit[1:]) * -1 result["attack_mod"] = to_hit range_match = REACH_RANGE_PATTERN.search(range_info) if range_match: if range_match.group(1): result["melee_range"] = range_match.group(2) if range_match.group(3): result["range"] = (range_match.group(4), range_match.group(5)) if "Hit: The target must make" in text: result["dice"] = "dice.NullDamageDice()" result["more_content"] = text.split("Hit: ")[1] else: try: result["dice"] = get_damage_dice(text) except AttributeError as error: print(text) raise error except ValueError as error: print('Attack with no damage', error) return None if "two hands" in text: damage_dice_versatile = get_damage_dice(text.split(", or")[1]) result["versatile_kwargs"] = f"dice={damage_dice_versatile}, attack_mod={to_hit}, " \ f"melee_range={range_match.group(2)}" if text.find("damage, and") != -1: # pylint: disable=simplifiable-if-statement result["more_content"] = text[text.find("damage, and") + 12:] elif text.find("damage.") != -1 and text.find("damage.") + 7 < len(text): # pylint: disable=simplifiable-if-statement result["more_content"] = text[text.find("damage.") + 7:] if "more_content" in result: # check to see if this matches anything we know saveordie_match = SAVEORDIE_PATTERN.search(result["more_content"]) if saveordie_match: result["attack_type"] = "SaveOrDie" result["extra_kwargs"] = f", threshold={int(saveordie_match.group(1))+1}, dc={saveordie_match.group(2)}, " \ f"save_type='{saveordie_match.group(3).lower()}'" del result["more_content"] else: saving_throw_match = SAVING_THROW_PATTERN.search(result["more_content"].lower()) if saving_throw_match: result["attack_type"] = "HitAndSaveAttack" result["extra_kwargs"] = f", dc={saving_throw_match.group(1)}, save_type='{saving_throw_match.group(2)}', " \ f"save_damage_dice='{saving_throw_match.group(3)}', " \ f"save_damage_type='{saving_throw_match.group(4)}', " \ f"damage_on_success={bool(saving_throw_match.group(5))}" del result["more_content"] return result
[docs] def get_multiattacks_from_text(text): multiattacks = [] single_ma = MA_SINGLE_PATTERN.search(text) attack_list = [] if single_ma: attack_num = NUMBER_WORDS.index(single_ma.group(1)) attack_type = single_ma.group(2) attack_list = [attack_type] * attack_num else: mix_ma = MA_MIX_PATTERN.search(text) if mix_ma: total_attack_num = mix_ma.group(1) total_attack_num = NUMBER_WORDS.index(total_attack_num) sum_attack_num = 0 idx = 2 attack_list = [] attack_num = 0 while sum_attack_num < total_attack_num: try: attack_num = NUMBER_WORDS.index(mix_ma.group(idx)) attack_type = mix_ma.group(idx + 1) except ValueError: # no match idx += 2 continue for _ in range(attack_num): attack_list.append(attack_type) sum_attack_num += attack_num idx += 2 if mix_ma.group(9): # if we had an "or", as in "one with its pike and one with its hooves or two with its longbow." alt_name = mix_ma.group(9) alt_num = mix_ma.group(8) if alt_num: alt_num = NUMBER_WORDS.index(alt_num) else: alt_num = attack_num # no number specified, so use the same number as the previous attack if alt_num < total_attack_num: # e.g., The sahuagin makes two melee attacks: one with its bite and one with its claws or spear. attack_list2 = attack_list[:] # copy the old attack list - we're going to modify it for _ in range(alt_num): # get rid of however many attacks we need to add our new attack in attack_list2.pop() for _ in range(alt_num): # we need a separate loop so that we don't get in the way of pop attack_list2.append(alt_name) else: # assumption: alt_num == total_attack_num attack_list2 = [alt_name] * alt_num multiattacks.append("multiattack_alt = attack_class.MultiAttack(name=\"Multiattack (alternate)\", " f"attack_list={attack_list2}") if mix_ma.group(10): attack_list3 = attack_list[:] for _ in range(NUMBER_WORDS.index(mix_ma.group(12))): try: attack_list3.remove(mix_ma.group(13)) except ValueError: attack_list3.remove(mix_ma.group(13) + "s") for _ in range(NUMBER_WORDS.index(mix_ma.group(10))): attack_list3.append(mix_ma.group(11)) multiattacks.append("multiattack_rep = attack_class.MultiAttack(name=\"Multiattack (replacement)\", " f"attack_list={attack_list3}") else: ma_either_match = MA_EITHER_PATTERN.search(text) if ma_either_match: total_attack_num = ma_either_match.group(1) total_attack_num = NUMBER_WORDS.index(total_attack_num) ma_one = ma_either_match.group(2) ma_one_list = [ma_one] * total_attack_num ma_two = ma_either_match.group(3) ma_two_list = [ma_two] * total_attack_num multiattacks.append(f"multiattack_{ma_one.lower()} = " f"attack_class.MultiAttack(name=\"Multiattack ({ma_one})\", " f"attack_list={ma_one_list})") multiattacks.append(f"multiattack_{ma_two.lower()} = " f"attack_class.MultiAttack(name=\"Multiattack ({ma_two})\", " f"attack_list={ma_two_list})") if ma_either_match.group(4): rep_num = ma_either_match.group(4) ma_three = ma_either_match.group(5) ma_one_three = ma_one_list[:] ma_two_three = ma_two_list[:] for _ in range(NUMBER_WORDS.index(rep_num)): ma_one_three.remove(ma_one) ma_one_three.append(ma_three) ma_two_three.remove(ma_two) ma_two_three.append(ma_three) multiattacks.append( f"multiattack_{ma_one.lower()}_{ma_three.lower()} = " f"attack_class.MultiAttack(name=\"Multiattack ({ma_one} and {ma_three})\", " f"attack_list={ma_one_three})") multiattacks.append( f"multiattack_{ma_two.lower()}_{ma_three.lower()} = " f"attack_class.MultiAttack(name=\"Multiattack ({ma_two} and {ma_three})\", " f"attack_list={ma_two_three})") else: ma_complex_match = MA_COMPLEX_PATTERN.search(text.lower()) if ma_complex_match: total_1 = ma_complex_match.group(1) # pylint: disable=unused-variable one_1 = ma_complex_match.group(2) one_1 = NUMBER_WORDS.index(one_1) one_1_type = ma_complex_match.group(3).replace(" ", "_") one_2 = ma_complex_match.group(4) one_2 = NUMBER_WORDS.index(one_2) one_2_type = ma_complex_match.group(5).replace(" ", "_") list_1 = [one_1_type] * one_1 list_1.extend([one_2_type] * one_2) multiattacks.append(f"multiattack_{one_1_type}_{one_2_type} = " f"attack_class.MultiAttack(name='Multiattack ({one_1_type} and {one_2_type})', " f"attack_list={list_1}") total_2 = ma_complex_match.group(6) total_2 = NUMBER_WORDS.index(total_2) two_1_type = ma_complex_match.group(7).replace(" ", "_") multiattacks.append(f"multiattack_{two_1_type} = " f"attack_class.MultiAttack(name='Multiattack ({two_1_type})', " f"attack_list={[two_1_type] * total_2}") if attack_list: multiattacks.append(f"multiattack = attack_class.MultiAttack(name=\"Multiattack\", attack_list={attack_list})") return multiattacks
[docs] def get_damage_dice(text: str): """ Get the string that corresponds to the Python code to create the correct DamageDice or DamageDiceBag for the given damage description :param text: text describing damage (e.g., "2d6 piercing damage") :return: """ damage_match = DAMAGE_PATTERN.search(text.lower()) if damage_match: dice_num = damage_match.group(1) dice_type = damage_match.group(2) damage_mod = damage_match.group(3) if damage_mod is None: damage_mod = 0 damage_type = damage_match.group(4) else: single_damage_match = SINGLE_DAMAGE_PATTERN.search(text.lower()) if not single_damage_match: raise ValueError(f"Don't know what kind of damage this is: {text}") dice_num = single_damage_match.group(1) dice_type = single_damage_match.group(1) damage_mod = 0 damage_type = single_damage_match.group(2) first_dice = f"dice.DamageDice(dice_num={dice_num}, dice_type={dice_type}, modifier={damage_mod}, " \ f"damage_type='{damage_type}')" if damage_match and damage_match.group(5): dice_num = damage_match.group(5) dice_type = damage_match.group(6) damage_mod = damage_match.group(7) if damage_mod is None: damage_mod = 0 damage_type = damage_match.group(8) second_dice = f"dice.DamageDice(dice_num={dice_num}, dice_type={dice_type}, modifier={damage_mod}, " \ f"damage_type='{damage_type}')" return f"dice.DamageDiceBag(dice_list=[{first_dice}, {second_dice}])" return first_dice
[docs] def write_attack_multiattack(attack_stats, multiattacks, indent_size, outfile): for attack_name in attack_stats: var_attack_name = attack_name.lower().replace(" ", "_").replace("(", "").replace(")", "") if isinstance(attack_stats[attack_name], str): if "Multiattack" not in attack_stats[attack_name]: outfile.write(" " * 2 * indent_size + f'{attack_name}. {attack_stats[attack_name]}\n') # write out the description else: # we have an attack attack_type = attack_stats[attack_name].get("attack_type", "Attack") extra_kwargs = attack_stats[attack_name].get("extra_kwargs", "") if "melee_kwargs" in attack_stats[attack_name]: outfile.write(" " * 2 * indent_size + f"{var_attack_name} = " f"attack_class.{attack_type}({attack_stats[attack_name]['melee_kwargs']}, " f"name='{attack_name}'{extra_kwargs})\n") if "versatile_kwargs" in attack_stats[attack_name]: outfile.write(" " * 2 * indent_size + f"{var_attack_name}_versatile = " f"attack_class.{attack_type}({attack_stats[attack_name]['versatile_kwargs']}, " f"name='{attack_name}_versatile'{extra_kwargs})\n") if "range_kwargs" in attack_stats[attack_name]: outfile.write(" " * 2 * indent_size + f"{var_attack_name}_range = " f"attack_class.{attack_type}({attack_stats[attack_name]['range_kwargs']}, " f"name='{attack_name}_range'{extra_kwargs})\n") if "range_disadv_kwargs" in attack_stats[attack_name]: outfile.write( " " * 2 * indent_size + f"{var_attack_name}_range_disadvantage = " f"attack_class.{attack_type}({attack_stats[attack_name]['range_disadv_kwargs']}, " f"name='{attack_name}_range_disadvantage'{extra_kwargs})\n") if "melee_range" in attack_stats[attack_name]: outfile.write( " " * 2 * indent_size + f"{var_attack_name} = " f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, " f"attack_mod={attack_stats[attack_name]['attack_mod']}, " f"melee_range={attack_stats[attack_name]['melee_range']}, " f"name='{attack_name}'{extra_kwargs})\n") if "range" in attack_stats[attack_name]: outfile.write( " " * 2 * indent_size + f"{var_attack_name}_range = " f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, " f"attack_mod={attack_stats[attack_name]['attack_mod']}, " f"range={attack_stats[attack_name]['range'][0]}, name='{attack_name}_range'{extra_kwargs})\n") outfile.write( " " * 2 * indent_size + f"{var_attack_name}_disadvantage = " f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, " f"attack_mod={attack_stats[attack_name]['attack_mod']}, " f"range={attack_stats[attack_name]['range'][1]}, " f"name='{attack_name}_range_disadvantage'{extra_kwargs})\n") if "more_content" in attack_stats[attack_name]: more_content = attack_stats[attack_name]["more_content"] if more_content: outfile.write(f"{' ' * 3 * indent_size}# {more_content}\n") for attack_str in multiattacks: outfile.write(" " * 2 * indent_size + "{}\n".format(attack_str.replace("'", ""))) # pylint: disable=consider-using-f-string
[docs] def read_dnd5einfo_monster_url(url, outname, indent_size=4): page_req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urllib.request.urlopen(page_req).read() soup = BeautifulSoup(webpage, 'lxml') read_dnd5einfo_monster_soup(soup, outname, indent_size)
[docs] def read_dnd5einfo_monster_soup(soup, outname, indent_size=4): outfile = open(outname, "a", encoding='utf8') soup = soup.body name = re.sub(INVALID_NAME_PATTERN, "", soup.div.h1.get_text()) # remove unwanted characters from name outfile.write(f"class {name}(combatant.Creature):\n") outfile.write(" " * indent_size + "def __init__(self, **kwargs):\n") outfile.write(" " * 2 * indent_size + "default_kwargs = {") content = soup.find("div", class_="pad group").article.div paragraphs = content.find_all('p') # get size and race size_race_alignment = paragraphs[0].get_text().lower() size_race_alignment_match = SIZE_RACE_ALIGNMENT_PATTERN.search(size_race_alignment) size = size_race_alignment_match.group(1).lower() race = size_race_alignment_match.group(2) alignment = size_race_alignment_match.group(3) # pylint: disable=unused-variable outfile.write(f'"size": "{size}", "creature_type": "{race}", ') # get ac, hp, and speed ac_hp_speed = paragraphs[1] ac, hp, speed = [x.next_sibling for x in ac_hp_speed.find_all("strong")] ac = DIGIT_PATTERN.search(ac).group(1) hp = DIGIT_PATTERN.search(hp).group(1) speed_match = SPEED_PATTERN.search(speed) speed = speed_match.group(1) climb_speed = speed_match.group(2) fly_speed = speed_match.group(3) swim_speed = speed_match.group(4) outfile.write(f"'ac': {ac}, 'max_hp': {hp}, 'speed': {speed}, ") if climb_speed: outfile.write(f"'climb_speed': {climb_speed}, ") if fly_speed: outfile.write(f"'fly_speed': {fly_speed}, ") if swim_speed: outfile.write(f"'swim_speed': {swim_speed}, ") outfile.write("\n") # get attributes attribute_table = soup.table.find_all("tr")[1] strength, dexterity, constitution, intelligence, wisdom, charisma = [DIGIT_PATTERN.search(x.get_text()).group(1) for x in attribute_table.find_all("td")] outfile.write(" "*3*indent_size + f'"strength": {strength}, "dexterity": {dexterity}, "constitution": {constitution}, ' f'"intelligence": {intelligence}, "wisdom": {wisdom}, "charisma": {charisma}}}\n') # get saving throws, skills, senses, resistances, immunities, vulnerabilities, and cr details = paragraphs[2] proficiencies = set() resistances = set() immunities = set() vulnerabilities = set() vision = "normal" cr = 0 for element in details.find_all("strong"): name = element.get_text().lower() if name == "saving throws": saving_throw_list = str(element.next_sibling).split(", ") for str_value in saving_throw_list: ability, mod = str_value.split() # pylint: disable=unused-variable ability = ability.lower() ability = ability_from_abbreviation(ability) proficiencies.add(ability) elif "resistances" in name: resistances_str = str(element.next_sibling) resistances_str = resistances_str.replace(";", ",") resistances_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", resistances_str) resistances_list = resistances_str.split(", ") resistances.update(resistances_list) elif "immunities" in name: immunities_str = str(element.next_sibling) immunities_str = immunities_str.replace(";", ",") immunities_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", immunities_str) immunities_list = immunities_str.split(", ") immunities.update(immunities_list) elif "vulnerabilities" in name: vulnerabilities_str = str(element.next_sibling) vulnerabilities_str = vulnerabilities_str.replace(";", ",") vulnerabilities_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", vulnerabilities_str) vulnerabilities_list = vulnerabilities_str.split(", ") vulnerabilities.update(vulnerabilities_list) elif name == "senses": senses = element.next_sibling if "truesight" in senses: vision = "truesight" elif "blindsight" in senses: vision = "blindsight" elif "darkvision" in senses: vision = "darkvision" elif name == "challenge": cr_str = str(element.next_sibling) cr_str = cr_str.strip() cr_str = DIGIT_PATTERN.search(cr_str).group(1) try: cr = int(cr_str) except ValueError: cr = FRACTION_MAP[cr_str] # pylint: disable=consider-using-f-string if proficiencies: outfile.write(" "*2*indent_size + "default_kwargs.update({{\"proficiencies\": {}}})\n".format(proficiencies)) if resistances: outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"resistances\": {}}})\n".format(resistances)) if immunities: outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"immunities\": {}}})\n".format(immunities)) if vulnerabilities: outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"proficiencies\": {}}})\n".format(vulnerabilities)) outfile.write(" "*2*indent_size + "default_kwargs.update({{'vision': \"{}\", 'cr': {}}})\n".format(vision, cr)) # pylint: enable=consider-using-f-string outfile.write(" " * 2 * indent_size + "# Features\n") outfile.write(" " * 2 * indent_size + '"""\n') for sibling in details.next_siblings: if sibling.name == 'p': outfile.write(" " * 2 * indent_size + sibling.get_text() + "\n") elif sibling.name == 'h4': if sibling.get_text() == "Actions": # pylint: disable=undefined-loop-variable outfile.write(" " * 2 * indent_size + '"""\n') outfile.write(" " * 2 * indent_size + "# Actions\n") outfile.write(" " * 2 * indent_size + '"""\n') break raise ValueError("unknown h4") attack_stats = {} legendary_attack_stats = {} multiattacks = [] legendary_multiattacks = [] legendary = False skip_next = False reactions = False for action in sibling.next_siblings: # pylint: disable=undefined-loop-variable if skip_next: skip_next = False continue if action.name == 'p': if not legendary: my_dict = attack_stats multi = multiattacks else: my_dict = legendary_attack_stats multi = legendary_multiattacks try: name_element = action.em attack_name = name_element.get_text().replace(".", "") text = str(name_element.next_sibling).strip().replace(NON_BREAK_SPACE, ' ') alt_text = action.get_text().replace(name_element.get_text(), "").strip() if not text or len(alt_text) > len(text): text = alt_text.replace(NON_BREAK_SPACE, ' ') except AttributeError: text = action.get_text().strip().replace(NON_BREAK_SPACE, ' ') try: my_dict[attack_name] += " " + text # the name of the previous attack except TypeError: if "more_content" in my_dict[attack_name]: my_dict[attack_name]["more_content"] += text else: my_dict[attack_name]["more_content"] = text continue if "Multiattack" in attack_name: multi.extend(get_multiattacks_from_text(text)) else: result = get_attack_from_text(text) if result: my_dict[attack_name] = result else: # if the result was empty (i.e., we found something that could not be parsed as an attack) my_dict[attack_name] = text # just give the description elif action.name == 'h4': if action.get_text() == "Legendary Actions": legendary = True skip_next = True elif action.get_text() == "Reactions": reactions = True break else: raise ValueError("unknown h4") elif action.name == "div": break write_attack_multiattack(attack_stats, multiattacks, indent_size, outfile) outfile.write(" " * 2 * indent_size + '"""\n') if legendary: outfile.write(" " * 2 * indent_size + "# Legendary Actions\n") outfile.write(" " * 2 * indent_size + '"""\n') write_attack_multiattack(legendary_attack_stats, legendary_multiattacks, indent_size, outfile) outfile.write(" "*2*indent_size + '"""\n') my_dict = {} multi = [] if reactions: outfile.write(" " * 2 * indent_size + "# Reactions\n") outfile.write(" " * 2 * indent_size + '"""\n') for sibling in action.next_siblings: # pylint: disable=undefined-loop-variable try: name_element = sibling.em attack_name = name_element.get_text().replace(".", "") text = str(name_element.next_sibling).strip() if not text: text = sibling.get_text() text = text.replace(name_element.get_text(), "") text = text.strip() except AttributeError: text = str(sibling).strip() my_dict[attack_name] += " " + text # the name of the previous attack continue if "Multiattack" in attack_name: multi.extend(get_multiattacks_from_text(text)) else: result = get_attack_from_text(text) if result: my_dict[attack_name] = result else: # if the result was empty (i.e., we found something that could not be parsed as an attack) my_dict[attack_name] = text # just give the description write_attack_multiattack(my_dict, multi, indent_size, outfile) outfile.write(" "*2*indent_size + '"""\n') outfile.write(" "*2*indent_size + "kwargs.update(default_kwargs)\n") outfile.write(" "*2*indent_size + "super().__init__(**kwargs)\n") outfile.write("\n") outfile.close()
[docs] def read_dnd5einfo_spell_soup(soup, outname, indent_size=4): f_out = open(outname, 'a') # pylint: disable=unspecified-encoding soup = soup.body name = re.sub(INVALID_NAME_PATTERN, "", soup.div.h1.get_text()) # remove unwanted characters from name f_out.write(f"class {name}(attack_class.Spell):\n") f_out.write(" " * indent_size + "def __init__(self, **kwargs):\n") content = soup.find("div", class_="pad group").article.div paragraphs = content.find_all('p') level_school_text = paragraphs[0].get_text().lower() level_school_match = LEVEL_SCHOOL_PATTERN.search(level_school_text) ritual = False if level_school_match: level = level_school_match.group(1) school = level_school_match.group(2) if level_school_match.group(3): ritual = True else: cantrip_match = CANTRIP_PATTERN.search(level_school_text) level = 0 school = cantrip_match.group(1) ritual_str = ", 'ritual': True" if ritual else "" f_out.write(" " * indent_size * 2 + f"default_kwargs = {{'level': {level}, 'school': '{school}'{ritual_str}}}\n") casting, range_, components, duration = paragraphs[2].find_all("strong") casting = str(casting.next_sibling)[2:] # e.g., ": 1 action" becomes "1 action" range_ = str(range_.next_sibling)[2:].split()[0] if not range_.isdigit(): range_ = f"'{range_.lower()}'" components = str(components.next_sibling)[2:] components = [c[0].lower() for c in components.split(", ")] duration = str(duration.next_sibling)[2:].lower() f_out.write(" " * indent_size * 2 + f"default_kwargs.update({{'casting': '{casting}', 'range': {range_}, " f"'duration': '{duration}', 'components': {components}}})\n") f_out.write(" " * indent_size * 2 + '# Description\n') f_out.write(" " * indent_size * 2 + '"""\n') for i in range(3, len(paragraphs)): text = paragraphs[i].get_text() if text.startswith("Material"): continue f_out.write(" " * indent_size * 2 + text + "\n") f_out.write(" " * indent_size * 2 + '"""\n') f_out.write(" " * indent_size * 2 + "kwargs.update(default_kwargs)\n") f_out.write(" " * indent_size *2 + "super().__init__(**kwargs)\n") f_out.write("\n") f_out.close()
[docs] def read_dnd5einfo_spell_url(url, outname, indent_size=4): page_req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urllib.request.urlopen(page_req).read() soup = BeautifulSoup(webpage, 'lxml') # print(soup.prettify()) read_dnd5einfo_spell_soup(soup, outname, indent_size)
[docs] def read_dnd5einfo_spell_page(outname, indent_size=4, first_url=None): soup = BeautifulSoup(urllib.request.urlopen(urllib.request.Request("https://dnd5e.info/spellcasting/spell/", headers={'User-Agent': 'Mozilla/5.0'})), 'lxml') soup = soup.body.div.find("div", id="page").div.div.div.section.find("div", class_='pad group').article.div.ul if first_url: found_first_url = False else: found_first_url = True for spell_element in soup.find_all("li"): url = str(spell_element.a['href']) if not found_first_url: if url == first_url: found_first_url = True else: continue print(url) read_dnd5einfo_spell_url(url, outname, indent_size)
[docs] def read_dnd5einfo_monster_page(outname, indent_size=4, start_url=None, stop_url=None): soup = BeautifulSoup(urllib.request.urlopen(urllib.request.Request("https://dnd5e.info/monsters/monster/", headers={'User-Agent': 'Mozilla/5.0'})), 'lxml') soup = soup.body.div.find("div", id="page").div.div.div.section.find("div", class_='pad group').article.div.ul if start_url: found_first_url = False else: found_first_url = True for monster_element in soup.find_all("li"): url = str(monster_element.a['href']) if not found_first_url: if url == start_url: found_first_url = True else: continue else: if stop_url == url: break print(url) read_dnd5einfo_monster_url(url, outname, indent_size)
if __name__ == "__main__": # pragma: no cover read_dnd5einfo_monster_url('https://dnd5e.info/monsters/monster/zombie', '../bestiary/__init__.py') # read_dnd5einfo_monster_soup(BeautifulSoup(open("Satyr.html"), 'lxml'), '../bestiary/__init__.py') # read_dnd5einfo_monster_page('../bestiary/__init__.py', start_url="https://dnd5e.info/monsters/monster/dragon-silver-wyrmling/", # stop_url="https://dnd5e.info/monsters/monster/drider/") # read_dnd5einfo_spell_page('../spell_list/__init__.py', first_url="https://dnd5e.info/spellcasting/spell/blur/") # read_dnd5einfo_spell_url("https://dnd5e.info/spellcasting/spell/black-tentacles/", '../spell_list/__init__.py') # split_d20htm_read("Skeletons.html", 'h2') # read_from_d20srd("http://5e.d20srd.org/srd/monsters/ghost.htm", "../bestiary/__init__.py") # read_from_d20srdhtml(BeautifulSoup(open("Warhorse Skeleton"), "lxml"), "../bestiary/__init__.py")