Skip to content

Commit

Permalink
Merge pull request #38 from Schmeitzke/main
Browse files Browse the repository at this point in the history
Add Simple and Advanced Geometry Dataset Generators
  • Loading branch information
andreaskoepf authored Feb 1, 2025
2 parents 0f6f58c + 9661a80 commit 4b7fdd7
Show file tree
Hide file tree
Showing 5 changed files with 558 additions and 0 deletions.
9 changes: 9 additions & 0 deletions reasoning_gym/geometry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .simple_geometry import SimpleGeometryConfig, SimpleGeometryDataset
from .advanced_geometry import AdvancedGeometryConfig, AdvancedGeometryDataset

__all__ = [
"SimpleGeometryConfig",
"SimpleGeometryDataset",
"AdvancedGeometryConfig",
"AdvancedGeometryDataset",
]
229 changes: 229 additions & 0 deletions reasoning_gym/geometry/advanced_geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
import random
from dataclasses import dataclass
from typing import Optional, List

import sympy
from sympy.geometry import Point, Triangle, Segment

from ..factory import ProceduralDataset, register_dataset


@dataclass
class AdvancedGeometryConfig:
"""
Configuration for generating advanced geometry tasks.
"""
min_coord: int = -10 # Minimum x/y coordinate
max_coord: int = 10 # Maximum x/y coordinate
size: int = 50 # Number of problems to generate
seed: Optional[int] = None

# Probability or list of tasks we want to generate
# For demonstration, we have three categories:
task_types: List[str] = None

def __post_init__(self):
if self.task_types is None:
# Default set of advanced tasks
self.task_types = [
"orthocenter",
"incircle_radius",
"angle_measure",
]

def validate(self):
assert self.min_coord < self.max_coord, "min_coord must be < max_coord."
assert self.size > 0, "Size of dataset must be positive."
assert len(self.task_types) > 0, "Must specify at least one task type."


class AdvancedGeometryDataset(ProceduralDataset):
"""
A dataset for advanced geometry tasks using coordinate geometry.
"""

def __init__(self, config: AdvancedGeometryConfig):
self._prompt_templates = {
"orthocenter": [
"Given triangle ABC with coordinates A={A}, B={B}, and C={C}, find the coordinates of its orthocenter.",
"For triangle with vertices A={A}, B={B}, and C={C}, determine the orthocenter (intersection of altitudes).",
],
"incircle_radius": [
"Consider triangle ABC with coordinates A={A}, B={B}, and C={C}. Compute the radius of its incircle.",
"Find the incircle radius of triangle ABC whose vertices are A={A}, B={B}, and C={C}.",
],
"angle_measure": [
"In triangle ABC with coordinates A={A}, B={B}, and C={C}, find the measure (in degrees) of angle ABC.",
"Given a triangle with vertices A={A}, B={B}, C={C}, determine the angle at B in degrees.",
],
}
super().__init__(config=config, seed=config.seed, size=config.size)

def __getitem__(self, idx: int) -> dict:
"""
Generate a single advanced geometry item based on the config's task types.
"""
rng = random.Random(self.seed + idx)
task_type = rng.choice(self.config.task_types)

# Randomly generate coordinates for a triangle
A, B, C = self._generate_non_degenerate_triangle(rng)

# Build a question and compute the solution
if task_type == "orthocenter":
question, answer, metadata = self._build_orthocenter_task(rng, A, B, C)
elif task_type == "incircle_radius":
question, answer, metadata = self._build_incircle_radius_task(rng, A, B, C)
elif task_type == "angle_measure":
question, answer, metadata = self._build_angle_measure_task(rng, A, B, C)
else:
raise ValueError(f"Unknown task_type: {task_type}")

return {
"question": question,
"answer": answer,
"metadata": metadata,
}

def _generate_non_degenerate_triangle(self, rng: random.Random):
"""
Generate a random non-degenerate triangle with integer coordinates
in [min_coord, max_coord] x [min_coord, max_coord].
"""
max_attempts = 100
for _ in range(max_attempts):
# Generate points with integer coordinates
points = []
for _ in range(3):
x = rng.randint(self.config.min_coord, self.config.max_coord)
y = rng.randint(self.config.min_coord, self.config.max_coord)
points.append(Point(x, y))

A, B, C = points

# Calculate signed area to check for non-degeneracy
# Using the formula: 1/2 * |x1(y2 - y3) + x2(y3 - y1) + x3(y1 - y2)|
area = abs(
A.x * (B.y - C.y) +
B.x * (C.y - A.y) +
C.x * (A.y - B.y)
) / 2

if area > 0:
return A, B, C

raise ValueError(f"Failed to generate a non-degenerate triangle after {max_attempts} attempts.")

def _build_orthocenter_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the orthocenter of triangle ABC.
"""
# Convert segments to lines
BC_line = sympy.Line(B, C)
CA_line = sympy.Line(C, A)

# Calculate altitudes by creating lines perpendicular from each vertex
alt_A = BC_line.perpendicular_line(A)
alt_B = CA_line.perpendicular_line(B)

# Find orthocenter (intersection of any two altitudes, e.g. alt_A and alt_B)
ortho = alt_A.intersection(alt_B)[0]

x_ortho_approx = float(ortho.x.evalf())
y_ortho_approx = float(ortho.y.evalf())

question_template = rng.choice(self._prompt_templates["orthocenter"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)
answer_str = f"({x_ortho_approx:.3f}, {y_ortho_approx:.3f})"

metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"orthocenter_exact": (str(ortho.x), str(ortho.y)),
"orthocenter_approx": (x_ortho_approx, y_ortho_approx),
}
return question, answer_str, metadata


def _build_incircle_radius_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the incircle radius of triangle ABC.
"""
# Calculate side lengths
a = B.distance(C)
b = C.distance(A)
c = A.distance(B)

# Semi-perimeter
s = (a + b + c) / 2

# Area using Heron's formula
area = sympy.sqrt(s * (s - a) * (s - b) * (s - c))

# Radius of incircle = Area / Semi-perimeter
radius = area / s

# Convert to float for final answer
radius_approx = float(radius.evalf())

question_template = rng.choice(self._prompt_templates["incircle_radius"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)
answer_str = f"{radius_approx:.3f}"

metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"incircle_radius_exact": str(radius),
"incircle_radius_approx": radius_approx,
}
return question, answer_str, metadata

def _build_angle_measure_task(self, rng: random.Random, A: Point, B: Point, C: Point):
"""
Build a question about finding the measure of angle ABC in degrees.
"""
# Angle at B means the angle ∠ABC
# Vector BA = A - B, BC = C - B
BA = A - B
BC = C - B

# Use vector dot product to find angle between BA and BC
# angle = arccos((BA · BC) / (|BA| * |BC|))
dot_val = BA.dot(BC)
mag_ba = BA.distance(Point(0, 0))
mag_bc = BC.distance(Point(0, 0))

# numerical check
if mag_ba == 0 or mag_bc == 0:
# degenerate, but theoretically we forced a non-degenerate triangle
angle_deg = 0
else:
cos_theta = dot_val / (mag_ba * mag_bc)
# clamp cos_theta to [-1, 1] to avoid floating rounding errors
cos_theta = max(-1, min(1, cos_theta))
angle_rad = sympy.acos(cos_theta)
angle_deg = float(angle_rad.evalf() * 180 / sympy.pi)

question_template = rng.choice(self._prompt_templates["angle_measure"])
question = question_template.format(
A=(A.x, A.y), B=(B.x, B.y), C=(C.x, C.y)
)

answer_str = f"{angle_deg:.2f}°"
metadata = {
"A": (A.x, A.y),
"B": (B.x, B.y),
"C": (C.x, C.y),
"angle_ABC_degrees": angle_deg,
}
return question, answer_str, metadata


# Register the dataset
register_dataset("advanced_geometry", AdvancedGeometryDataset, AdvancedGeometryConfig)
140 changes: 140 additions & 0 deletions reasoning_gym/geometry/simple_geometry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import random
from dataclasses import dataclass
from typing import Optional

from ..factory import ProceduralDataset, register_dataset

@dataclass
class SimpleGeometryConfig:
"""
Configuration for generating basic geometry (angle-finding) tasks.
Produces a random convex polygon with N sides, random angles
for the first (N-1) sides, and asks the solver to find the last angle.
"""

min_sides: int = 3 # Minimum number of sides (e.g. triangle)
max_sides: int = 6 # Maximum number of sides (e.g. hexagon)
min_angle: int = 10 # Minimum angle (in degrees) for each of the first (N-1) angles
max_angle: int = 170 # Maximum angle (in degrees) for each of the first (N-1) angles
seed: Optional[int] = None # Random seed
size: int = 100 # Number of geometry tasks to generate

def validate(self) -> None:
"""
Validate configuration parameters.
"""
assert self.min_sides >= 3, "min_sides must be at least 3 (triangle)."
assert self.max_sides >= self.min_sides, "max_sides must be >= min_sides."
assert 0 < self.min_angle < 180, "min_angle must be in (0, 180)."
assert self.max_angle <= 179, "max_angle should be less than 180."
assert self.max_angle >= self.min_angle, "max_angle must be >= min_angle."


class SimpleGeometryDataset(ProceduralDataset):
"""
A dataset for simple polygon angle-finding tasks.
We randomly choose the number of sides N within [min_sides, max_sides].
We then generate (N-1) random angles (in degrees), ensuring their sum is
strictly less than the total sum for an (N)-sided convex polygon (which is 180*(N-2)).
The question asks for the missing angle; the answer is computed by subtracting the
sum of known angles from 180*(N-2).
"""

def __init__(self, config: SimpleGeometryConfig):
self._prompt_templates = [
(
"Given a convex polygon with {n_sides} sides, its first {n_minus_1} interior angles "
"are: {angle_list}. What is the measure of the remaining interior angle (in degrees)?"
),
(
"A convex polygon has {n_sides} sides. The measures of "
"the first {n_minus_1} interior angles are: {angle_list}. "
"Find the measure of the last interior angle."
),
(
"Consider a convex {n_sides}-gon whose first {n_minus_1} interior angles "
"are: {angle_list}. Determine the measure of the remaining angle."
),
]
super().__init__(config=config, seed=config.seed, size=config.size)

def __getitem__(self, idx: int) -> dict:
"""
Generate a single geometry angle-finding item.
Returns:
A dict with:
- question: str
- answer: str (the missing angle, as an integer or float in degrees)
- metadata: dict (n_sides, angles, sum_of_known, missing_angle, etc.)
"""
rng = random.Random(self.seed + idx)

# Randomly pick the number of sides
n_sides = rng.randint(self.config.min_sides, self.config.max_sides)

# Total interior angle sum for a convex n_sides-gon
total_sum = 180 * (n_sides - 2)

# Generate (n_sides - 1) random angles, ensuring their sum < total_sum
known_angles = self._generate_valid_angles(rng, n_sides, total_sum)

# Missing angle
missing_angle = total_sum - sum(known_angles)

# Build the question string
angle_list_str = ", ".join(f"{a:.1f}°" for a in known_angles)
prompt = rng.choice(self._prompt_templates).format(
n_sides=n_sides,
n_minus_1=n_sides - 1,
angle_list=angle_list_str
)

# Round the missing angle to one decimal place or integer if it is very close to an integer
# so that the answer remains consistent and clean
missing_angle_rounded = round(missing_angle, 1)
if abs(missing_angle_rounded - round(missing_angle_rounded)) < 1e-6:
# If it is effectively an integer, keep it as int
missing_angle_rounded = int(missing_angle_rounded)

answer_str = str(missing_angle_rounded)

return {
"question": prompt,
"answer": answer_str,
"metadata": {
"n_sides": n_sides,
"known_angles": known_angles,
"sum_of_known_angles": sum(known_angles),
"missing_angle_raw": missing_angle,
"missing_angle_rounded": missing_angle_rounded,
"total_interior_sum": total_sum,
},
}

def _generate_valid_angles(self, rng: random.Random, n_sides: int, total_sum: int):
"""
Generate (n_sides - 1) random angles in [min_angle, max_angle],
ensuring the sum is strictly less than total_sum to keep a valid missing angle.
We keep retrying until we find a valid set or reach a max attempt limit.
"""
max_attempts = 100
for _ in range(max_attempts):
angles = []
# We choose angles one by one
for _ in range(n_sides - 1):
angle = rng.randint(self.config.min_angle, self.config.max_angle)
angles.append(float(angle))

# Check if the sum is strictly less than total_sum
if sum(angles) < total_sum:
return angles

# If we fail after max_attempts, raise an error
raise ValueError(
f"Could not generate valid angles for an {n_sides}-gon "
f"with total sum {total_sum} within {max_attempts} attempts."
)

# Register the dataset so it can be accessed similarly to the others
register_dataset("simple_geometry", SimpleGeometryDataset, SimpleGeometryConfig)
Loading

0 comments on commit 4b7fdd7

Please sign in to comment.