import urllib.request
import re
from bs4 import BeautifulSoup
from DnD_5e.utility_methods_dnd import ability_from_abbreviation
NON_BREAK_SPACE = '\xa0'
DIGIT_PATTERN = re.compile(r"(\d+)")
SIZE_RACE_ALIGNMENT_PATTERN = re.compile(r"(\w+) " # size
r"(\w+(?: \(\w+(?:, \w+)?\))?), " # race
r"((?:(?:lawful)|(?:neutral)|(?:chaotic)) (?:(?:good)|(?:neutral)|(?:evil))" # alignment
r"|(?:neutral)|(?:unaligned)|(?:any(?:.*)? alignment))")
SPEED_PATTERN = re.compile(r"(\d+) ft\.?(?:, climb (\d+) ft\.?)?(?:, fly (\d+) ft\.?)?(?:, swim (\d+) ft\.?)?")
# multiattack: multiple uses of a single attack
MA_SINGLE_PATTERN = re.compile(r"makes ([^ ]*?) ([^ ]*?) attacks\.")
# multiattack: different attacks
MA_MIX_PATTERN = re.compile(r"makes ([^ ]+).*? attacks: ([^ ]+) with its ([^ ]+)"
r"(?:, ([^ ]+) with its ([^ ]+),)*? and ([^ ]+) (?:(?:with its)|(?:to)) (.*?)"
r"(?: or (?:([^ ]+) with its )?(.*?))?\."
r"(?: It can make ([^ ]+) ([^ ]+) attack in place of its ([^ ]+) ([^ ]+) attacks)?")
# multiattack: repeat one of several attacks multiple times
MA_EITHER_PATTERN = re.compile(r"makes ([^ ]*?) attacks, either with its ([^ ]*?) or its ([^ ]*?)\.(?: It can replace ([^ ]*?) of those attacks with a ([^ ]*?) attack)?")
# for multiattacks like the one Medusa makes
MA_COMPLEX_PATTERN = re.compile(r"makes either ([a-z]+) .* attacks \– ([a-z]+) with its (.*) and ([a-z]+) with its (.*) \– or ([a-z]+) .* attacks with its (.*)\.")
# attack that can be melee or ranged
MELEE_OR_RANGED_PATTERN = re.compile(r"Melee or Ranged Weapon Attack: \+(\d+) to hit, reach (\d+) ft\.,? or range (\d+)\/(\d+) ft\.", re.IGNORECASE)
# a melee attack that has reach greater than 5ft or a ranged attack
REACH_RANGE_PATTERN = re.compile(r"(?:(reach) (\d+))?(?:(range) (\d+)\/(\d+))?") # pylint: disable=anomalous-backslash-in-string
DAMAGE_PATTERN_STR = r"\d+ \((\d+)d(\d+)(?: \+ (\d+))?\) ([a-z]+) damage"
DAMAGE_PATTERN = re.compile(DAMAGE_PATTERN_STR + r"(?: plus " + DAMAGE_PATTERN_STR + ")?")
INVALID_NAME_STR = r"[ ,/'’()-]"
INVALID_NAME_PATTERN = re.compile(INVALID_NAME_STR)
# pattern for a single number of damage
SINGLE_DAMAGE_PATTERN = re.compile(r"(\d+) ([a-z]+) damage")
SAVING_THROW_PATTERN = re.compile(r"the target must make a dc (\d+) ([a-z]+) saving throw, taking \d+ "
r"\((\d+d\d+)\) ([a-z]+) damage on a failed save(, or half as much damage on a successful one)")
SAVEORDIE_PATTERN = re.compile(r"If the target is a creature that has (\d+) hit points or fewer, it must succeed on a DC (\d+) (\w+) saving throw or die\.")
NUMBER_WORDS = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
FRACTION_MAP = {"1/8": 0.125, "1/4": 0.25, "1/2": 0.5}
LEVEL_SCHOOL_PATTERN = re.compile(r"(\d)(?:(?:st)|(?:nd)|(?:rd)|(?:th))-level ([a-z]+)(?: \((ritual)\))?")
CANTRIP_PATTERN = re.compile(r"([a-z]+) cantrip")
[docs]
def get_attack_from_text(text): # pylint: disable=too-many-nested-blocks
"""
Get the information about an attack from a text description
:param text: the text to read
:return: the info about the attack found
"""
result = {}
melee_ranged_match = MELEE_OR_RANGED_PATTERN.search(text)
if melee_ranged_match:
attack_mod = melee_ranged_match.group(1)
reach = melee_ranged_match.group(2)
normal_range = melee_ranged_match.group(3)
disadv_range = melee_ranged_match.group(4)
# [melee damage info] in melee or [range damage info] at range
# [melee or ranged damage info], or [two-handed damage info] if used with two hands to make a melee attack
damage_dice_versatile = None
melee_idx = text.find("in melee")
if melee_idx != -1:
damage_dice_melee = get_damage_dice(text[:melee_idx])
damage_dice_range = get_damage_dice(text[melee_idx:])
else:
two_handed_idx = text.find("with two hands")
if two_handed_idx == -1:
# same damage for melee or ranged
damage_dice_melee = get_damage_dice(text)
damage_dice_range = damage_dice_melee
else:
split_idx = text.find("damage, or")
if split_idx == -1:
raise ValueError("Don't know how to parse the damage for this melee/ranged attack")
split_idx += 6
normal_damage = get_damage_dice(text[:split_idx])
damage_dice_melee = normal_damage
damage_dice_range = normal_damage
damage_dice_versatile = get_damage_dice(text[split_idx:])
result["melee_kwargs"] = f"damage_dice={damage_dice_melee}, attack_mod={attack_mod}, melee_range={reach}"
result["range_kwargs"] = f"damage_dice={damage_dice_range}, attack_mod={attack_mod}, range={normal_range}"
result["range_disadv_kwargs"] = f"damage_dice={damage_dice_range}, attack_mod={attack_mod}, " \
f"range={disadv_range}"
if damage_dice_versatile:
result["versatile_kwargs"] = f"damage_dice={damage_dice_versatile}, attack_mod={attack_mod}, melee_range={reach}"
return result
if "Weapon Attack:" in text:
to_hit, range_info = text.split(", ")[:2] # TODO: care about number of targets
to_hit = to_hit.split(" to hit")[0].split("Weapon Attack:")[1].strip()
sign = to_hit[0]
if sign == "+":
to_hit = int(to_hit[1:])
elif sign == "-":
to_hit = int(to_hit[1:]) * -1
result["attack_mod"] = to_hit
range_match = REACH_RANGE_PATTERN.search(range_info)
if range_match:
if range_match.group(1):
result["melee_range"] = range_match.group(2)
if range_match.group(3):
result["range"] = (range_match.group(4), range_match.group(5))
if "Hit: The target must make" in text:
result["dice"] = "dice.NullDamageDice()"
result["more_content"] = text.split("Hit: ")[1]
else:
try:
result["dice"] = get_damage_dice(text)
except AttributeError as error:
print(text)
raise error
except ValueError as error:
print('Attack with no damage', error)
return None
if "two hands" in text:
damage_dice_versatile = get_damage_dice(text.split(", or")[1])
result["versatile_kwargs"] = f"dice={damage_dice_versatile}, attack_mod={to_hit}, " \
f"melee_range={range_match.group(2)}"
if text.find("damage, and") != -1: # pylint: disable=simplifiable-if-statement
result["more_content"] = text[text.find("damage, and") + 12:]
elif text.find("damage.") != -1 and text.find("damage.") + 7 < len(text): # pylint: disable=simplifiable-if-statement
result["more_content"] = text[text.find("damage.") + 7:]
if "more_content" in result: # check to see if this matches anything we know
saveordie_match = SAVEORDIE_PATTERN.search(result["more_content"])
if saveordie_match:
result["attack_type"] = "SaveOrDie"
result["extra_kwargs"] = f", threshold={int(saveordie_match.group(1))+1}, dc={saveordie_match.group(2)}, " \
f"save_type='{saveordie_match.group(3).lower()}'"
del result["more_content"]
else:
saving_throw_match = SAVING_THROW_PATTERN.search(result["more_content"].lower())
if saving_throw_match:
result["attack_type"] = "HitAndSaveAttack"
result["extra_kwargs"] = f", dc={saving_throw_match.group(1)}, save_type='{saving_throw_match.group(2)}', " \
f"save_damage_dice='{saving_throw_match.group(3)}', " \
f"save_damage_type='{saving_throw_match.group(4)}', " \
f"damage_on_success={bool(saving_throw_match.group(5))}"
del result["more_content"]
return result
[docs]
def get_multiattacks_from_text(text):
multiattacks = []
single_ma = MA_SINGLE_PATTERN.search(text)
attack_list = []
if single_ma:
attack_num = NUMBER_WORDS.index(single_ma.group(1))
attack_type = single_ma.group(2)
attack_list = [attack_type] * attack_num
else:
mix_ma = MA_MIX_PATTERN.search(text)
if mix_ma:
total_attack_num = mix_ma.group(1)
total_attack_num = NUMBER_WORDS.index(total_attack_num)
sum_attack_num = 0
idx = 2
attack_list = []
attack_num = 0
while sum_attack_num < total_attack_num:
try:
attack_num = NUMBER_WORDS.index(mix_ma.group(idx))
attack_type = mix_ma.group(idx + 1)
except ValueError: # no match
idx += 2
continue
for _ in range(attack_num):
attack_list.append(attack_type)
sum_attack_num += attack_num
idx += 2
if mix_ma.group(9): # if we had an "or", as in "one with its pike and one with its hooves or two with its longbow."
alt_name = mix_ma.group(9)
alt_num = mix_ma.group(8)
if alt_num:
alt_num = NUMBER_WORDS.index(alt_num)
else:
alt_num = attack_num # no number specified, so use the same number as the previous attack
if alt_num < total_attack_num: # e.g., The sahuagin makes two melee attacks: one with its bite and one with its claws or spear.
attack_list2 = attack_list[:] # copy the old attack list - we're going to modify it
for _ in range(alt_num): # get rid of however many attacks we need to add our new attack in
attack_list2.pop()
for _ in range(alt_num): # we need a separate loop so that we don't get in the way of pop
attack_list2.append(alt_name)
else: # assumption: alt_num == total_attack_num
attack_list2 = [alt_name] * alt_num
multiattacks.append("multiattack_alt = attack_class.MultiAttack(name=\"Multiattack (alternate)\", "
f"attack_list={attack_list2}")
if mix_ma.group(10):
attack_list3 = attack_list[:]
for _ in range(NUMBER_WORDS.index(mix_ma.group(12))):
try:
attack_list3.remove(mix_ma.group(13))
except ValueError:
attack_list3.remove(mix_ma.group(13) + "s")
for _ in range(NUMBER_WORDS.index(mix_ma.group(10))):
attack_list3.append(mix_ma.group(11))
multiattacks.append("multiattack_rep = attack_class.MultiAttack(name=\"Multiattack (replacement)\", "
f"attack_list={attack_list3}")
else:
ma_either_match = MA_EITHER_PATTERN.search(text)
if ma_either_match:
total_attack_num = ma_either_match.group(1)
total_attack_num = NUMBER_WORDS.index(total_attack_num)
ma_one = ma_either_match.group(2)
ma_one_list = [ma_one] * total_attack_num
ma_two = ma_either_match.group(3)
ma_two_list = [ma_two] * total_attack_num
multiattacks.append(f"multiattack_{ma_one.lower()} = "
f"attack_class.MultiAttack(name=\"Multiattack ({ma_one})\", "
f"attack_list={ma_one_list})")
multiattacks.append(f"multiattack_{ma_two.lower()} = "
f"attack_class.MultiAttack(name=\"Multiattack ({ma_two})\", "
f"attack_list={ma_two_list})")
if ma_either_match.group(4):
rep_num = ma_either_match.group(4)
ma_three = ma_either_match.group(5)
ma_one_three = ma_one_list[:]
ma_two_three = ma_two_list[:]
for _ in range(NUMBER_WORDS.index(rep_num)):
ma_one_three.remove(ma_one)
ma_one_three.append(ma_three)
ma_two_three.remove(ma_two)
ma_two_three.append(ma_three)
multiattacks.append(
f"multiattack_{ma_one.lower()}_{ma_three.lower()} = "
f"attack_class.MultiAttack(name=\"Multiattack ({ma_one} and {ma_three})\", "
f"attack_list={ma_one_three})")
multiattacks.append(
f"multiattack_{ma_two.lower()}_{ma_three.lower()} = "
f"attack_class.MultiAttack(name=\"Multiattack ({ma_two} and {ma_three})\", "
f"attack_list={ma_two_three})")
else:
ma_complex_match = MA_COMPLEX_PATTERN.search(text.lower())
if ma_complex_match:
total_1 = ma_complex_match.group(1) # pylint: disable=unused-variable
one_1 = ma_complex_match.group(2)
one_1 = NUMBER_WORDS.index(one_1)
one_1_type = ma_complex_match.group(3).replace(" ", "_")
one_2 = ma_complex_match.group(4)
one_2 = NUMBER_WORDS.index(one_2)
one_2_type = ma_complex_match.group(5).replace(" ", "_")
list_1 = [one_1_type] * one_1
list_1.extend([one_2_type] * one_2)
multiattacks.append(f"multiattack_{one_1_type}_{one_2_type} = "
f"attack_class.MultiAttack(name='Multiattack ({one_1_type} and {one_2_type})', "
f"attack_list={list_1}")
total_2 = ma_complex_match.group(6)
total_2 = NUMBER_WORDS.index(total_2)
two_1_type = ma_complex_match.group(7).replace(" ", "_")
multiattacks.append(f"multiattack_{two_1_type} = "
f"attack_class.MultiAttack(name='Multiattack ({two_1_type})', "
f"attack_list={[two_1_type] * total_2}")
if attack_list:
multiattacks.append(f"multiattack = attack_class.MultiAttack(name=\"Multiattack\", attack_list={attack_list})")
return multiattacks
[docs]
def get_damage_dice(text: str):
"""
Get the string that corresponds to the Python code to create the correct DamageDice or DamageDiceBag for the given damage description
:param text: text describing damage (e.g., "2d6 piercing damage")
:return:
"""
damage_match = DAMAGE_PATTERN.search(text.lower())
if damage_match:
dice_num = damage_match.group(1)
dice_type = damage_match.group(2)
damage_mod = damage_match.group(3)
if damage_mod is None:
damage_mod = 0
damage_type = damage_match.group(4)
else:
single_damage_match = SINGLE_DAMAGE_PATTERN.search(text.lower())
if not single_damage_match:
raise ValueError(f"Don't know what kind of damage this is: {text}")
dice_num = single_damage_match.group(1)
dice_type = single_damage_match.group(1)
damage_mod = 0
damage_type = single_damage_match.group(2)
first_dice = f"dice.DamageDice(dice_num={dice_num}, dice_type={dice_type}, modifier={damage_mod}, " \
f"damage_type='{damage_type}')"
if damage_match and damage_match.group(5):
dice_num = damage_match.group(5)
dice_type = damage_match.group(6)
damage_mod = damage_match.group(7)
if damage_mod is None:
damage_mod = 0
damage_type = damage_match.group(8)
second_dice = f"dice.DamageDice(dice_num={dice_num}, dice_type={dice_type}, modifier={damage_mod}, " \
f"damage_type='{damage_type}')"
return f"dice.DamageDiceBag(dice_list=[{first_dice}, {second_dice}])"
return first_dice
[docs]
def write_attack_multiattack(attack_stats, multiattacks, indent_size, outfile):
for attack_name in attack_stats:
var_attack_name = attack_name.lower().replace(" ", "_").replace("(", "").replace(")", "")
if isinstance(attack_stats[attack_name], str):
if "Multiattack" not in attack_stats[attack_name]:
outfile.write(" " * 2 * indent_size + f'{attack_name}. {attack_stats[attack_name]}\n') # write out the description
else: # we have an attack
attack_type = attack_stats[attack_name].get("attack_type", "Attack")
extra_kwargs = attack_stats[attack_name].get("extra_kwargs", "")
if "melee_kwargs" in attack_stats[attack_name]:
outfile.write(" " * 2 * indent_size + f"{var_attack_name} = "
f"attack_class.{attack_type}({attack_stats[attack_name]['melee_kwargs']}, "
f"name='{attack_name}'{extra_kwargs})\n")
if "versatile_kwargs" in attack_stats[attack_name]:
outfile.write(" " * 2 * indent_size + f"{var_attack_name}_versatile = "
f"attack_class.{attack_type}({attack_stats[attack_name]['versatile_kwargs']}, "
f"name='{attack_name}_versatile'{extra_kwargs})\n")
if "range_kwargs" in attack_stats[attack_name]:
outfile.write(" " * 2 * indent_size + f"{var_attack_name}_range = "
f"attack_class.{attack_type}({attack_stats[attack_name]['range_kwargs']}, "
f"name='{attack_name}_range'{extra_kwargs})\n")
if "range_disadv_kwargs" in attack_stats[attack_name]:
outfile.write(
" " * 2 * indent_size + f"{var_attack_name}_range_disadvantage = "
f"attack_class.{attack_type}({attack_stats[attack_name]['range_disadv_kwargs']}, "
f"name='{attack_name}_range_disadvantage'{extra_kwargs})\n")
if "melee_range" in attack_stats[attack_name]:
outfile.write(
" " * 2 * indent_size + f"{var_attack_name} = "
f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, "
f"attack_mod={attack_stats[attack_name]['attack_mod']}, "
f"melee_range={attack_stats[attack_name]['melee_range']}, "
f"name='{attack_name}'{extra_kwargs})\n")
if "range" in attack_stats[attack_name]:
outfile.write(
" " * 2 * indent_size + f"{var_attack_name}_range = "
f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, "
f"attack_mod={attack_stats[attack_name]['attack_mod']}, "
f"range={attack_stats[attack_name]['range'][0]}, name='{attack_name}_range'{extra_kwargs})\n")
outfile.write(
" " * 2 * indent_size + f"{var_attack_name}_disadvantage = "
f"attack_class.{attack_type}(damage_dice={attack_stats[attack_name]['dice']}, "
f"attack_mod={attack_stats[attack_name]['attack_mod']}, "
f"range={attack_stats[attack_name]['range'][1]}, "
f"name='{attack_name}_range_disadvantage'{extra_kwargs})\n")
if "more_content" in attack_stats[attack_name]:
more_content = attack_stats[attack_name]["more_content"]
if more_content:
outfile.write(f"{' ' * 3 * indent_size}# {more_content}\n")
for attack_str in multiattacks:
outfile.write(" " * 2 * indent_size + "{}\n".format(attack_str.replace("'", ""))) # pylint: disable=consider-using-f-string
[docs]
def read_dnd5einfo_monster_url(url, outname, indent_size=4):
page_req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urllib.request.urlopen(page_req).read()
soup = BeautifulSoup(webpage, 'lxml')
read_dnd5einfo_monster_soup(soup, outname, indent_size)
[docs]
def read_dnd5einfo_monster_soup(soup, outname, indent_size=4):
outfile = open(outname, "a", encoding='utf8')
soup = soup.body
name = re.sub(INVALID_NAME_PATTERN, "", soup.div.h1.get_text()) # remove unwanted characters from name
outfile.write(f"class {name}(combatant.Creature):\n")
outfile.write(" " * indent_size + "def __init__(self, **kwargs):\n")
outfile.write(" " * 2 * indent_size + "default_kwargs = {")
content = soup.find("div", class_="pad group").article.div
paragraphs = content.find_all('p')
# get size and race
size_race_alignment = paragraphs[0].get_text().lower()
size_race_alignment_match = SIZE_RACE_ALIGNMENT_PATTERN.search(size_race_alignment)
size = size_race_alignment_match.group(1).lower()
race = size_race_alignment_match.group(2)
alignment = size_race_alignment_match.group(3) # pylint: disable=unused-variable
outfile.write(f'"size": "{size}", "creature_type": "{race}", ')
# get ac, hp, and speed
ac_hp_speed = paragraphs[1]
ac, hp, speed = [x.next_sibling for x in ac_hp_speed.find_all("strong")]
ac = DIGIT_PATTERN.search(ac).group(1)
hp = DIGIT_PATTERN.search(hp).group(1)
speed_match = SPEED_PATTERN.search(speed)
speed = speed_match.group(1)
climb_speed = speed_match.group(2)
fly_speed = speed_match.group(3)
swim_speed = speed_match.group(4)
outfile.write(f"'ac': {ac}, 'max_hp': {hp}, 'speed': {speed}, ")
if climb_speed:
outfile.write(f"'climb_speed': {climb_speed}, ")
if fly_speed:
outfile.write(f"'fly_speed': {fly_speed}, ")
if swim_speed:
outfile.write(f"'swim_speed': {swim_speed}, ")
outfile.write("\n")
# get attributes
attribute_table = soup.table.find_all("tr")[1]
strength, dexterity, constitution, intelligence, wisdom, charisma = [DIGIT_PATTERN.search(x.get_text()).group(1) for x in attribute_table.find_all("td")]
outfile.write(" "*3*indent_size +
f'"strength": {strength}, "dexterity": {dexterity}, "constitution": {constitution}, '
f'"intelligence": {intelligence}, "wisdom": {wisdom}, "charisma": {charisma}}}\n')
# get saving throws, skills, senses, resistances, immunities, vulnerabilities, and cr
details = paragraphs[2]
proficiencies = set()
resistances = set()
immunities = set()
vulnerabilities = set()
vision = "normal"
cr = 0
for element in details.find_all("strong"):
name = element.get_text().lower()
if name == "saving throws":
saving_throw_list = str(element.next_sibling).split(", ")
for str_value in saving_throw_list:
ability, mod = str_value.split() # pylint: disable=unused-variable
ability = ability.lower()
ability = ability_from_abbreviation(ability)
proficiencies.add(ability)
elif "resistances" in name:
resistances_str = str(element.next_sibling)
resistances_str = resistances_str.replace(";", ",")
resistances_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", resistances_str)
resistances_list = resistances_str.split(", ")
resistances.update(resistances_list)
elif "immunities" in name:
immunities_str = str(element.next_sibling)
immunities_str = immunities_str.replace(";", ",")
immunities_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", immunities_str)
immunities_list = immunities_str.split(", ")
immunities.update(immunities_list)
elif "vulnerabilities" in name:
vulnerabilities_str = str(element.next_sibling)
vulnerabilities_str = vulnerabilities_str.replace(";", ",")
vulnerabilities_str = re.sub(r"and (\w+) from nonmagical weapons", r"\1", vulnerabilities_str)
vulnerabilities_list = vulnerabilities_str.split(", ")
vulnerabilities.update(vulnerabilities_list)
elif name == "senses":
senses = element.next_sibling
if "truesight" in senses:
vision = "truesight"
elif "blindsight" in senses:
vision = "blindsight"
elif "darkvision" in senses:
vision = "darkvision"
elif name == "challenge":
cr_str = str(element.next_sibling)
cr_str = cr_str.strip()
cr_str = DIGIT_PATTERN.search(cr_str).group(1)
try:
cr = int(cr_str)
except ValueError:
cr = FRACTION_MAP[cr_str]
# pylint: disable=consider-using-f-string
if proficiencies:
outfile.write(" "*2*indent_size + "default_kwargs.update({{\"proficiencies\": {}}})\n".format(proficiencies))
if resistances:
outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"resistances\": {}}})\n".format(resistances))
if immunities:
outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"immunities\": {}}})\n".format(immunities))
if vulnerabilities:
outfile.write(" " * 2 * indent_size + "default_kwargs.update({{\"proficiencies\": {}}})\n".format(vulnerabilities))
outfile.write(" "*2*indent_size + "default_kwargs.update({{'vision': \"{}\", 'cr': {}}})\n".format(vision, cr))
# pylint: enable=consider-using-f-string
outfile.write(" " * 2 * indent_size + "# Features\n")
outfile.write(" " * 2 * indent_size + '"""\n')
for sibling in details.next_siblings:
if sibling.name == 'p':
outfile.write(" " * 2 * indent_size + sibling.get_text() + "\n")
elif sibling.name == 'h4':
if sibling.get_text() == "Actions": # pylint: disable=undefined-loop-variable
outfile.write(" " * 2 * indent_size + '"""\n')
outfile.write(" " * 2 * indent_size + "# Actions\n")
outfile.write(" " * 2 * indent_size + '"""\n')
break
raise ValueError("unknown h4")
attack_stats = {}
legendary_attack_stats = {}
multiattacks = []
legendary_multiattacks = []
legendary = False
skip_next = False
reactions = False
for action in sibling.next_siblings: # pylint: disable=undefined-loop-variable
if skip_next:
skip_next = False
continue
if action.name == 'p':
if not legendary:
my_dict = attack_stats
multi = multiattacks
else:
my_dict = legendary_attack_stats
multi = legendary_multiattacks
try:
name_element = action.em
attack_name = name_element.get_text().replace(".", "")
text = str(name_element.next_sibling).strip().replace(NON_BREAK_SPACE, ' ')
alt_text = action.get_text().replace(name_element.get_text(), "").strip()
if not text or len(alt_text) > len(text):
text = alt_text.replace(NON_BREAK_SPACE, ' ')
except AttributeError:
text = action.get_text().strip().replace(NON_BREAK_SPACE, ' ')
try:
my_dict[attack_name] += " " + text # the name of the previous attack
except TypeError:
if "more_content" in my_dict[attack_name]:
my_dict[attack_name]["more_content"] += text
else:
my_dict[attack_name]["more_content"] = text
continue
if "Multiattack" in attack_name:
multi.extend(get_multiattacks_from_text(text))
else:
result = get_attack_from_text(text)
if result:
my_dict[attack_name] = result
else: # if the result was empty (i.e., we found something that could not be parsed as an attack)
my_dict[attack_name] = text # just give the description
elif action.name == 'h4':
if action.get_text() == "Legendary Actions":
legendary = True
skip_next = True
elif action.get_text() == "Reactions":
reactions = True
break
else:
raise ValueError("unknown h4")
elif action.name == "div":
break
write_attack_multiattack(attack_stats, multiattacks, indent_size, outfile)
outfile.write(" " * 2 * indent_size + '"""\n')
if legendary:
outfile.write(" " * 2 * indent_size + "# Legendary Actions\n")
outfile.write(" " * 2 * indent_size + '"""\n')
write_attack_multiattack(legendary_attack_stats, legendary_multiattacks, indent_size, outfile)
outfile.write(" "*2*indent_size + '"""\n')
my_dict = {}
multi = []
if reactions:
outfile.write(" " * 2 * indent_size + "# Reactions\n")
outfile.write(" " * 2 * indent_size + '"""\n')
for sibling in action.next_siblings: # pylint: disable=undefined-loop-variable
try:
name_element = sibling.em
attack_name = name_element.get_text().replace(".", "")
text = str(name_element.next_sibling).strip()
if not text:
text = sibling.get_text()
text = text.replace(name_element.get_text(), "")
text = text.strip()
except AttributeError:
text = str(sibling).strip()
my_dict[attack_name] += " " + text # the name of the previous attack
continue
if "Multiattack" in attack_name:
multi.extend(get_multiattacks_from_text(text))
else:
result = get_attack_from_text(text)
if result:
my_dict[attack_name] = result
else: # if the result was empty (i.e., we found something that could not be parsed as an attack)
my_dict[attack_name] = text # just give the description
write_attack_multiattack(my_dict, multi, indent_size, outfile)
outfile.write(" "*2*indent_size + '"""\n')
outfile.write(" "*2*indent_size + "kwargs.update(default_kwargs)\n")
outfile.write(" "*2*indent_size + "super().__init__(**kwargs)\n")
outfile.write("\n")
outfile.close()
[docs]
def read_dnd5einfo_spell_soup(soup, outname, indent_size=4):
f_out = open(outname, 'a') # pylint: disable=unspecified-encoding
soup = soup.body
name = re.sub(INVALID_NAME_PATTERN, "", soup.div.h1.get_text()) # remove unwanted characters from name
f_out.write(f"class {name}(attack_class.Spell):\n")
f_out.write(" " * indent_size + "def __init__(self, **kwargs):\n")
content = soup.find("div", class_="pad group").article.div
paragraphs = content.find_all('p')
level_school_text = paragraphs[0].get_text().lower()
level_school_match = LEVEL_SCHOOL_PATTERN.search(level_school_text)
ritual = False
if level_school_match:
level = level_school_match.group(1)
school = level_school_match.group(2)
if level_school_match.group(3):
ritual = True
else:
cantrip_match = CANTRIP_PATTERN.search(level_school_text)
level = 0
school = cantrip_match.group(1)
ritual_str = ", 'ritual': True" if ritual else ""
f_out.write(" " * indent_size * 2 + f"default_kwargs = {{'level': {level}, 'school': '{school}'{ritual_str}}}\n")
casting, range_, components, duration = paragraphs[2].find_all("strong")
casting = str(casting.next_sibling)[2:] # e.g., ": 1 action" becomes "1 action"
range_ = str(range_.next_sibling)[2:].split()[0]
if not range_.isdigit():
range_ = f"'{range_.lower()}'"
components = str(components.next_sibling)[2:]
components = [c[0].lower() for c in components.split(", ")]
duration = str(duration.next_sibling)[2:].lower()
f_out.write(" " * indent_size * 2 + f"default_kwargs.update({{'casting': '{casting}', 'range': {range_}, "
f"'duration': '{duration}', 'components': {components}}})\n")
f_out.write(" " * indent_size * 2 + '# Description\n')
f_out.write(" " * indent_size * 2 + '"""\n')
for i in range(3, len(paragraphs)):
text = paragraphs[i].get_text()
if text.startswith("Material"):
continue
f_out.write(" " * indent_size * 2 + text + "\n")
f_out.write(" " * indent_size * 2 + '"""\n')
f_out.write(" " * indent_size * 2 + "kwargs.update(default_kwargs)\n")
f_out.write(" " * indent_size *2 + "super().__init__(**kwargs)\n")
f_out.write("\n")
f_out.close()
[docs]
def read_dnd5einfo_spell_url(url, outname, indent_size=4):
page_req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urllib.request.urlopen(page_req).read()
soup = BeautifulSoup(webpage, 'lxml')
# print(soup.prettify())
read_dnd5einfo_spell_soup(soup, outname, indent_size)
[docs]
def read_dnd5einfo_spell_page(outname, indent_size=4, first_url=None):
soup = BeautifulSoup(urllib.request.urlopen(urllib.request.Request("https://dnd5e.info/spellcasting/spell/", headers={'User-Agent': 'Mozilla/5.0'})), 'lxml')
soup = soup.body.div.find("div", id="page").div.div.div.section.find("div", class_='pad group').article.div.ul
if first_url:
found_first_url = False
else:
found_first_url = True
for spell_element in soup.find_all("li"):
url = str(spell_element.a['href'])
if not found_first_url:
if url == first_url:
found_first_url = True
else:
continue
print(url)
read_dnd5einfo_spell_url(url, outname, indent_size)
[docs]
def read_dnd5einfo_monster_page(outname, indent_size=4, start_url=None, stop_url=None):
soup = BeautifulSoup(urllib.request.urlopen(urllib.request.Request("https://dnd5e.info/monsters/monster/", headers={'User-Agent': 'Mozilla/5.0'})), 'lxml')
soup = soup.body.div.find("div", id="page").div.div.div.section.find("div", class_='pad group').article.div.ul
if start_url:
found_first_url = False
else:
found_first_url = True
for monster_element in soup.find_all("li"):
url = str(monster_element.a['href'])
if not found_first_url:
if url == start_url:
found_first_url = True
else:
continue
else:
if stop_url == url:
break
print(url)
read_dnd5einfo_monster_url(url, outname, indent_size)
if __name__ == "__main__": # pragma: no cover
read_dnd5einfo_monster_url('https://dnd5e.info/monsters/monster/zombie', '../bestiary/__init__.py')
# read_dnd5einfo_monster_soup(BeautifulSoup(open("Satyr.html"), 'lxml'), '../bestiary/__init__.py')
# read_dnd5einfo_monster_page('../bestiary/__init__.py', start_url="https://dnd5e.info/monsters/monster/dragon-silver-wyrmling/",
# stop_url="https://dnd5e.info/monsters/monster/drider/")
# read_dnd5einfo_spell_page('../spell_list/__init__.py', first_url="https://dnd5e.info/spellcasting/spell/blur/")
# read_dnd5einfo_spell_url("https://dnd5e.info/spellcasting/spell/black-tentacles/", '../spell_list/__init__.py')
# split_d20htm_read("Skeletons.html", 'h2')
# read_from_d20srd("http://5e.d20srd.org/srd/monsters/ghost.htm", "../bestiary/__init__.py")
# read_from_d20srdhtml(BeautifulSoup(open("Warhorse Skeleton"), "lxml"), "../bestiary/__init__.py")