-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from Schmeitzke/main
Add Simple and Advanced Geometry Dataset Generators
- Loading branch information
Showing
5 changed files
with
558 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from .simple_geometry import SimpleGeometryConfig, SimpleGeometryDataset | ||
from .advanced_geometry import AdvancedGeometryConfig, AdvancedGeometryDataset | ||
|
||
__all__ = [ | ||
"SimpleGeometryConfig", | ||
"SimpleGeometryDataset", | ||
"AdvancedGeometryConfig", | ||
"AdvancedGeometryDataset", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
import random | ||
from dataclasses import dataclass | ||
from typing import Optional, List | ||
|
||
import sympy | ||
from sympy.geometry import Point, Triangle, Segment | ||
|
||
from ..factory import ProceduralDataset, register_dataset | ||
|
||
|
||
@dataclass | ||
class AdvancedGeometryConfig: | ||
""" | ||
Configuration for generating advanced geometry tasks. | ||
""" | ||
min_coord: int = -10 # Minimum x/y coordinate | ||
max_coord: int = 10 # Maximum x/y coordinate | ||
size: int = 50 # Number of problems to generate | ||
seed: Optional[int] = None | ||
|
||
# Probability or list of tasks we want to generate | ||
# For demonstration, we have three categories: | ||
task_types: List[str] = None | ||
|
||
def __post_init__(self): | ||
if self.task_types is None: | ||
# Default set of advanced tasks | ||
self.task_types = [ | ||
"orthocenter", | ||
"incircle_radius", | ||
"angle_measure", | ||
] | ||
|
||
def validate(self): | ||
assert self.min_coord < self.max_coord, "min_coord must be < max_coord." | ||
assert self.size > 0, "Size of dataset must be positive." | ||
assert len(self.task_types) > 0, "Must specify at least one task type." | ||
|
||
|
||
class AdvancedGeometryDataset(ProceduralDataset): | ||
""" | ||
A dataset for advanced geometry tasks using coordinate geometry. | ||
""" | ||
|
||
def __init__(self, config: AdvancedGeometryConfig): | ||
self._prompt_templates = { | ||
"orthocenter": [ | ||
"Given triangle ABC with coordinates A={A}, B={B}, and C={C}, find the coordinates of its orthocenter.", | ||
"For triangle with vertices A={A}, B={B}, and C={C}, determine the orthocenter (intersection of altitudes).", | ||
], | ||
"incircle_radius": [ | ||
"Consider triangle ABC with coordinates A={A}, B={B}, and C={C}. Compute the radius of its incircle.", | ||
"Find the incircle radius of triangle ABC whose vertices are A={A}, B={B}, and C={C}.", | ||
], | ||
"angle_measure": [ | ||
"In triangle ABC with coordinates A={A}, B={B}, and C={C}, find the measure (in degrees) of angle ABC.", | ||
"Given a triangle with vertices A={A}, B={B}, C={C}, determine the angle at B in degrees.", | ||
], | ||
} | ||
super().__init__(config=config, seed=config.seed, size=config.size) | ||
|
||
def __getitem__(self, idx: int) -> dict: | ||
""" | ||
Generate a single advanced geometry item based on the config's task types. | ||
""" | ||
rng = random.Random(self.seed + idx) | ||
task_type = rng.choice(self.config.task_types) | ||
|
||
# Randomly generate coordinates for a triangle | ||
A, B, C = self._generate_non_degenerate_triangle(rng) | ||
|
||
# Build a question and compute the solution | ||
if task_type == "orthocenter": | ||
question, answer, metadata = self._build_orthocenter_task(rng, A, B, C) | ||
elif task_type == "incircle_radius": | ||
question, answer, metadata = self._build_incircle_radius_task(rng, A, B, C) | ||
elif task_type == "angle_measure": | ||
question, answer, metadata = self._build_angle_measure_task(rng, A, B, C) | ||
else: | ||
raise ValueError(f"Unknown task_type: {task_type}") | ||
|
||
return { | ||
"question": question, | ||
"answer": answer, | ||
"metadata": metadata, | ||
} | ||
|
||
def _generate_non_degenerate_triangle(self, rng: random.Random): | ||
""" | ||
Generate a random non-degenerate triangle with integer coordinates | ||
in [min_coord, max_coord] x [min_coord, max_coord]. | ||
""" | ||
max_attempts = 100 | ||
for _ in range(max_attempts): | ||
# Generate points with integer coordinates | ||
points = [] | ||
for _ in range(3): | ||
x = rng.randint(self.config.min_coord, self.config.max_coord) | ||
y = rng.randint(self.config.min_coord, self.config.max_coord) | ||
points.append(Point(x, y)) | ||
|
||
A, B, C = points | ||
|
||
# Calculate signed area to check for non-degeneracy | ||
# Using the formula: 1/2 * |x1(y2 - y3) + x2(y3 - y1) + x3(y1 - y2)| | ||
area = abs( | ||
A.x * (B.y - C.y) + | ||
B.x * (C.y - A.y) + | ||
C.x * (A.y - B.y) | ||
) / 2 | ||
|
||
if area > 0: | ||
return A, B, C | ||
|
||
raise ValueError(f"Failed to generate a non-degenerate triangle after {max_attempts} attempts.") | ||
|
||
def _build_orthocenter_task(self, rng: random.Random, A: Point, B: Point, C: Point): | ||
""" | ||
Build a question about finding the orthocenter of triangle ABC. | ||
""" | ||
# Convert segments to lines | ||
BC_line = sympy.Line(B, C) | ||
CA_line = sympy.Line(C, A) | ||
|
||
# Calculate altitudes by creating lines perpendicular from each vertex | ||
alt_A = BC_line.perpendicular_line(A) | ||
alt_B = CA_line.perpendicular_line(B) | ||
|
||
# Find orthocenter (intersection of any two altitudes, e.g. alt_A and alt_B) | ||
ortho = alt_A.intersection(alt_B)[0] | ||
|
||
x_ortho_approx = float(ortho.x.evalf()) | ||
y_ortho_approx = float(ortho.y.evalf()) | ||
|
||
question_template = rng.choice(self._prompt_templates["orthocenter"]) | ||
question = question_template.format( | ||
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y) | ||
) | ||
answer_str = f"({x_ortho_approx:.3f}, {y_ortho_approx:.3f})" | ||
|
||
metadata = { | ||
"A": (A.x, A.y), | ||
"B": (B.x, B.y), | ||
"C": (C.x, C.y), | ||
"orthocenter_exact": (str(ortho.x), str(ortho.y)), | ||
"orthocenter_approx": (x_ortho_approx, y_ortho_approx), | ||
} | ||
return question, answer_str, metadata | ||
|
||
|
||
def _build_incircle_radius_task(self, rng: random.Random, A: Point, B: Point, C: Point): | ||
""" | ||
Build a question about finding the incircle radius of triangle ABC. | ||
""" | ||
# Calculate side lengths | ||
a = B.distance(C) | ||
b = C.distance(A) | ||
c = A.distance(B) | ||
|
||
# Semi-perimeter | ||
s = (a + b + c) / 2 | ||
|
||
# Area using Heron's formula | ||
area = sympy.sqrt(s * (s - a) * (s - b) * (s - c)) | ||
|
||
# Radius of incircle = Area / Semi-perimeter | ||
radius = area / s | ||
|
||
# Convert to float for final answer | ||
radius_approx = float(radius.evalf()) | ||
|
||
question_template = rng.choice(self._prompt_templates["incircle_radius"]) | ||
question = question_template.format( | ||
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y) | ||
) | ||
answer_str = f"{radius_approx:.3f}" | ||
|
||
metadata = { | ||
"A": (A.x, A.y), | ||
"B": (B.x, B.y), | ||
"C": (C.x, C.y), | ||
"incircle_radius_exact": str(radius), | ||
"incircle_radius_approx": radius_approx, | ||
} | ||
return question, answer_str, metadata | ||
|
||
def _build_angle_measure_task(self, rng: random.Random, A: Point, B: Point, C: Point): | ||
""" | ||
Build a question about finding the measure of angle ABC in degrees. | ||
""" | ||
# Angle at B means the angle ∠ABC | ||
# Vector BA = A - B, BC = C - B | ||
BA = A - B | ||
BC = C - B | ||
|
||
# Use vector dot product to find angle between BA and BC | ||
# angle = arccos((BA · BC) / (|BA| * |BC|)) | ||
dot_val = BA.dot(BC) | ||
mag_ba = BA.distance(Point(0, 0)) | ||
mag_bc = BC.distance(Point(0, 0)) | ||
|
||
# numerical check | ||
if mag_ba == 0 or mag_bc == 0: | ||
# degenerate, but theoretically we forced a non-degenerate triangle | ||
angle_deg = 0 | ||
else: | ||
cos_theta = dot_val / (mag_ba * mag_bc) | ||
# clamp cos_theta to [-1, 1] to avoid floating rounding errors | ||
cos_theta = max(-1, min(1, cos_theta)) | ||
angle_rad = sympy.acos(cos_theta) | ||
angle_deg = float(angle_rad.evalf() * 180 / sympy.pi) | ||
|
||
question_template = rng.choice(self._prompt_templates["angle_measure"]) | ||
question = question_template.format( | ||
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y) | ||
) | ||
|
||
answer_str = f"{angle_deg:.2f}°" | ||
metadata = { | ||
"A": (A.x, A.y), | ||
"B": (B.x, B.y), | ||
"C": (C.x, C.y), | ||
"angle_ABC_degrees": angle_deg, | ||
} | ||
return question, answer_str, metadata | ||
|
||
|
||
# Register the dataset | ||
register_dataset("advanced_geometry", AdvancedGeometryDataset, AdvancedGeometryConfig) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import random | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
from ..factory import ProceduralDataset, register_dataset | ||
|
||
@dataclass | ||
class SimpleGeometryConfig: | ||
""" | ||
Configuration for generating basic geometry (angle-finding) tasks. | ||
Produces a random convex polygon with N sides, random angles | ||
for the first (N-1) sides, and asks the solver to find the last angle. | ||
""" | ||
|
||
min_sides: int = 3 # Minimum number of sides (e.g. triangle) | ||
max_sides: int = 6 # Maximum number of sides (e.g. hexagon) | ||
min_angle: int = 10 # Minimum angle (in degrees) for each of the first (N-1) angles | ||
max_angle: int = 170 # Maximum angle (in degrees) for each of the first (N-1) angles | ||
seed: Optional[int] = None # Random seed | ||
size: int = 100 # Number of geometry tasks to generate | ||
|
||
def validate(self) -> None: | ||
""" | ||
Validate configuration parameters. | ||
""" | ||
assert self.min_sides >= 3, "min_sides must be at least 3 (triangle)." | ||
assert self.max_sides >= self.min_sides, "max_sides must be >= min_sides." | ||
assert 0 < self.min_angle < 180, "min_angle must be in (0, 180)." | ||
assert self.max_angle <= 179, "max_angle should be less than 180." | ||
assert self.max_angle >= self.min_angle, "max_angle must be >= min_angle." | ||
|
||
|
||
class SimpleGeometryDataset(ProceduralDataset): | ||
""" | ||
A dataset for simple polygon angle-finding tasks. | ||
We randomly choose the number of sides N within [min_sides, max_sides]. | ||
We then generate (N-1) random angles (in degrees), ensuring their sum is | ||
strictly less than the total sum for an (N)-sided convex polygon (which is 180*(N-2)). | ||
The question asks for the missing angle; the answer is computed by subtracting the | ||
sum of known angles from 180*(N-2). | ||
""" | ||
|
||
def __init__(self, config: SimpleGeometryConfig): | ||
self._prompt_templates = [ | ||
( | ||
"Given a convex polygon with {n_sides} sides, its first {n_minus_1} interior angles " | ||
"are: {angle_list}. What is the measure of the remaining interior angle (in degrees)?" | ||
), | ||
( | ||
"A convex polygon has {n_sides} sides. The measures of " | ||
"the first {n_minus_1} interior angles are: {angle_list}. " | ||
"Find the measure of the last interior angle." | ||
), | ||
( | ||
"Consider a convex {n_sides}-gon whose first {n_minus_1} interior angles " | ||
"are: {angle_list}. Determine the measure of the remaining angle." | ||
), | ||
] | ||
super().__init__(config=config, seed=config.seed, size=config.size) | ||
|
||
def __getitem__(self, idx: int) -> dict: | ||
""" | ||
Generate a single geometry angle-finding item. | ||
Returns: | ||
A dict with: | ||
- question: str | ||
- answer: str (the missing angle, as an integer or float in degrees) | ||
- metadata: dict (n_sides, angles, sum_of_known, missing_angle, etc.) | ||
""" | ||
rng = random.Random(self.seed + idx) | ||
|
||
# Randomly pick the number of sides | ||
n_sides = rng.randint(self.config.min_sides, self.config.max_sides) | ||
|
||
# Total interior angle sum for a convex n_sides-gon | ||
total_sum = 180 * (n_sides - 2) | ||
|
||
# Generate (n_sides - 1) random angles, ensuring their sum < total_sum | ||
known_angles = self._generate_valid_angles(rng, n_sides, total_sum) | ||
|
||
# Missing angle | ||
missing_angle = total_sum - sum(known_angles) | ||
|
||
# Build the question string | ||
angle_list_str = ", ".join(f"{a:.1f}°" for a in known_angles) | ||
prompt = rng.choice(self._prompt_templates).format( | ||
n_sides=n_sides, | ||
n_minus_1=n_sides - 1, | ||
angle_list=angle_list_str | ||
) | ||
|
||
# Round the missing angle to one decimal place or integer if it is very close to an integer | ||
# so that the answer remains consistent and clean | ||
missing_angle_rounded = round(missing_angle, 1) | ||
if abs(missing_angle_rounded - round(missing_angle_rounded)) < 1e-6: | ||
# If it is effectively an integer, keep it as int | ||
missing_angle_rounded = int(missing_angle_rounded) | ||
|
||
answer_str = str(missing_angle_rounded) | ||
|
||
return { | ||
"question": prompt, | ||
"answer": answer_str, | ||
"metadata": { | ||
"n_sides": n_sides, | ||
"known_angles": known_angles, | ||
"sum_of_known_angles": sum(known_angles), | ||
"missing_angle_raw": missing_angle, | ||
"missing_angle_rounded": missing_angle_rounded, | ||
"total_interior_sum": total_sum, | ||
}, | ||
} | ||
|
||
def _generate_valid_angles(self, rng: random.Random, n_sides: int, total_sum: int): | ||
""" | ||
Generate (n_sides - 1) random angles in [min_angle, max_angle], | ||
ensuring the sum is strictly less than total_sum to keep a valid missing angle. | ||
We keep retrying until we find a valid set or reach a max attempt limit. | ||
""" | ||
max_attempts = 100 | ||
for _ in range(max_attempts): | ||
angles = [] | ||
# We choose angles one by one | ||
for _ in range(n_sides - 1): | ||
angle = rng.randint(self.config.min_angle, self.config.max_angle) | ||
angles.append(float(angle)) | ||
|
||
# Check if the sum is strictly less than total_sum | ||
if sum(angles) < total_sum: | ||
return angles | ||
|
||
# If we fail after max_attempts, raise an error | ||
raise ValueError( | ||
f"Could not generate valid angles for an {n_sides}-gon " | ||
f"with total sum {total_sum} within {max_attempts} attempts." | ||
) | ||
|
||
# Register the dataset so it can be accessed similarly to the others | ||
register_dataset("simple_geometry", SimpleGeometryDataset, SimpleGeometryConfig) |
Oops, something went wrong.