Skip to content

Commit

Permalink
Update README
Browse files Browse the repository at this point in the history
Update README
  • Loading branch information
TylerJosephson committed Jan 6, 2025
1 parent 9072ff0 commit b3fe3c0
Show file tree
Hide file tree
Showing 42 changed files with 2,220 additions and 1 deletion.
Binary file added Lean Code/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/- Chris Lovett is the author of this CSV parser -/
import Lean.Data.Parsec

open Lean Parsec

/-
# The structure of the CSV file
Each line in a CSV file represents a row, also called a "record," and each record
contains multiple entries called "fields," separated by commas.
-/

/-- `Field_CSV` represents a single entry in a CSV record
and is an alias for `String`.
-/

abbrev Field_CSV := String

/-- `Record` is an alias for `Array Field_CSV`, representing a single row in a CSV file.
An `Array` is preferred to a List since its fixed-size in nature
and the elements of the same type. -/
abbrev Record := Array Field_CSV

/-- `Csv` is an alias for `Array Record`, representing the entire content of a CSV file.-/
abbrev Csv := Array Record

/-
## The parser
The functions below establish the grounds for how the parser should interpret different parts of a CSV file.
The parser code processes an input and rendere a structured output or an error.
-/

/-- `textData` matches characters that are not
enclosed in quotes `""`, so that it does not parse any unwanted field
-- It matches any character except control characters (like newline or tab).-/
def textData : Parsec Char := satisfy fun c =>
-- The operation below checks if the character's Unicode value falls within certain ranges.
0x20 ≤ c.val ∧ c.val ≤ 0x21-- Space to '!' (excluding double quote)
0x23 ≤ c.val ∧ c.val ≤ 0x2B-- '#' to '+'
0x2D ≤ c.val ∧ c.val ≤ 0x7E -- '-' to '~' (almost all printable characters)

/-- `cr`, `lf`, and `crlf` are parsers for carriage return, line feed, and their combination.-/
-- These are common newline characters used in text files across different types of OS/machines.-/
def cr : Parsec Char := pchar '\r' -- Carriage Return (used in older Mac systems)-/
def lf : Parsec Char := pchar '\n' -- Line Feed (used in Unix/Linux and modern Mac systems)-/
def crlf : Parsec String := pstring "\r\n" -- CR+LF combination (used in Windows)-/

/-- `comma` matches a comma `,`, used to separate fields in a CSV file.-/
def comma : Parsec Char := pchar ','

/-- `dQUOTE` matches a double quote character, which is used to escape fields in a Csv file.-/
def dQUOTE : Parsec Char := pchar '\"'

/-- `twoDQUOTE` matches two consecutive double quotes `""`, which represent an escaped double quote within a quoted field.-/
def twoDQUOTE : Parsec Char := attempt (pchar '"' *> pchar '"')

/-- `escaped` matches fields surrounded by double quotes.
-- It allows for more complex data inside the field, such as commas and newlines.-/
def escaped : Parsec String := attempt
dQUOTE *> -- Start with an opening double quote
manyChars (textData <|> comma <|> cr <|> lf <|> twoDQUOTE) -- Allow special characters
<* dQUOTE -- End with a closing double quote

/-- `nonEscaped` is for fields that are not enclosed in double quotes.
It matches a series of valid characters that do not include special characters like commas or newlines.-/
def nonEscaped: Parsec String := manyChars textData

/-- `field` is a parser that can handle both escaped and non-escaped fields.-
It uses the `escaped` parser first and if that fails, it tries `nonEscaped`.-/
def field : Parsec Field_CSV := escaped <|> nonEscaped

/--
`manySep` is a higher-order parser that matches many occurrences of a pattern `p`
separated by a separator `s`.
For example, in a CSV file, fields are separated by commas.
This function returns an array of parsed elements.
-/
def manySep (p : Parsec α) (s : Parsec β) : Parsec $ Array α := do
manyCore (attempt (s *> p)) #[←p]

/-- `record` parses a single row of CSV, which is a sequence of fields separated by commas.-/
def record : Parsec Record := manySep field comma

/-- `file` parses the entire CSV file, which consists of multiple records separated by newlines.-/
def file : Parsec $ Array Record :=
manySep record (crlf <* notFollowedBy eof) <* (optional crlf) <* eof

/-- `parse` is a function that takes a string (the content of a CSV file) and returns either
the parsed data successfully or an error message.-/
def parse (s : String) : Except String $ Array $ Array $ String :=
match file s.mkIterator with
| Parsec.ParseResult.success _ res => Except.ok res -- Return the result if successful
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx}: {err}" -- Return an error message

-- e.g., let's parse a CSV string directly.

#eval parse ("a,\"b\nc\"\r\n1,2\r\n4,5,6")

/--
`manyHomoCore` is a parser that ensures all parsed arrays have the same size.
This is useful for validating that every row in a CSV file has the same number of fields,
which is often a requirement for properly formatted CSV files.
-/
partial def manyHomoCore (p : Parsec $ Array α) (acc : Array $ Array α) : Parsec $ Array $ Array α :=
(do
let first ← p
if acc.size = 0 then
manyHomoCore p (acc.push first) -- If it's the first element, it just adds it
else
if acc.back.size = first.size then
manyHomoCore p (acc.push first) -- If the sizes match, parsing continues
else
fail "expect same size" -- If sizes don't match, error thrown
)
<|> pure acc -- If parsing fails, it returns the accumulated result

/--
`manySepHomo` parses many arrays of `p` with the same size separated by `s`.
It is used to parse a CSV file while making sure that all rows have the same number of fields ensuring uniformity.
-/
def manySepHomo (p : Parsec $ Array α) (s : Parsec β) : Parsec $ Array $ Array α := do
manyHomoCore (attempt (s *> p)) #[←p]

/-- `file'` is an alternative CSV file parser most likely.
It ensures each row has the same number of fields.-/
def file' : Parsec $ Array Record := manySepHomo record (crlf <* notFollowedBy eof) <* (optional crlf) <* eof

/-- `parse'` is a function that parses a string with an additional check for homogeneous field counts across records.-/
def parse' (s : String) : Except String $ Array $ Array $ String :=
match file' s.mkIterator with
| Parsec.ParseResult.success _ res => Except.ok res -- Return the result if successful
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx}: {err}" -- Return an error message

/-This is by John Velkey.-/
/-John's function to handle exceptions -/
/-- This function handles the result of the CSV parsing and returns either the parsed data or an error message.
If parsing fails, it returns a default error message.-/
def CSVParseExceptHandler (inputParse : Except String (Array (Array String))) : Array $ Array $ String :=
match inputParse with
| Except.ok α => α -- If parsing is successful, return the result
| Except.error _ => #[#["CSV Parse Error"]] -- If there's an error, return a default error message


def CSVParseIt : IO Unit := do
-- Read through of the CSV file
let fileContent ← IO.FS.readFile "Lecture 9 - lists, arrays, indexing, matrices/CSV Parser/CSV_From_Excel_Test_no_BOM.csv"

-- Parse the file using the parse function
let parsedCSV := parse fileContent

-- Handles the result using the CSVParseExceptHandler function
let result := CSVParseExceptHandler parsedCSV

IO.println result

#eval CSVParseIt

-- Note we read the file using the absolute path - get path this way:
#eval IO.currentDir
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Data 1,Data 2,Data 3
1,2,3
1,2,3
1,2,3
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
1.47,1.5,1.52,1.55,1.57,1.6,1.63,1.65,1.68,1.7,1.73,1.75,1.78,1.8,1.83
52.21,53.12,54.48,55.84,57.2,58.57,59.93,61.29,63.11,64.47,66.28,68.1,69.92,72.19,74.46
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0.000000460,0.000000808,0.000001240,0.000001950,0.000003470,0.000019800,0.000030100,0.000039900,0.000048400,0.000060000,0.000071900,0.000080000,0.000091000,0.000122165,0.000169951,0.000240034,0.000469896,0.001029937,0.002010342,0.003045762,0.003948507,0.004830186,0.005912671,0.006904737,0.007945685,0.00893684,0.009950202,0.019566394,0.02937333,0.039573751,0.053048755,0.059782481,0.070285857,0.080382115,0.090693352,0.100863524,0.145578459,0.201715088,0.255897983,0.306354706,0.355727318,0.400710563,0.450996492,0.500912037,0.550729386,0.600539736,0.650423732,0.699409212,0.749430462,0.799589806,0.810603847,0.820750466,0.830251162,0.840610582,0.850090332,0.860139894,0.870536548,0.880439116,0.890351942,0.900085449,0.909885718,0.920408124,0.930173819,0.940068359,0.949686839,0.959833706,0.969298233,0.979271939,0.989260685,0.998416184
29.24223573,58.6913572,88.1822873,117.7708096,147.0208837,176.6272698,188.1,197.3,205.7335818,219.4,234.899105,247.5,263.9058595,292.8992342,309.4,320.8391657,333.9,346.7523766,351.561349,354.0055815,355.4263289,356.484218,357.5251441,358.3168714,359.0361979,359.633011,360.2001188,363.8561184,366.7213338,369.6,373.5872441,375.5479842,377.6609013,379.2731153,380.7148902,381.9894138,386.4698639,390.404903,393.2731922,395.4325597,396.994765,398.472277,399.9312104,401.2288073,402.3886749,403.4473961,404.4381916,405.3195484,406.161677,406.9765303,407.2760134,407.4391274,407.5884184,407.7410674,407.8706136,408.0103314,408.1640723,408.3117647,408.4410411,408.5758706,408.7267665,408.8781608,409.027047,409.1678296,409.3365257,409.4983328,409.6727584,409.8817061,410.1012315,410.5466624

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0.13493,0.137476,0.142568,0.145114,0.150205,0.152751,0.155297,0.160388,0.162935,0.165481,0.168026,0.173118,0.175664,0.178209
1154.35,1167.89,1181.45,1190.48,1204.04,1208.57,1222.1,1231.16,1244.69,1253.72,1262.76,1285.32,1294.35,1307.89
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0.13493,0.137476,0.142568,0.145114,0.150205,0.152751,0.155297,0.160388,0.162935,0.165481,0.168026,0.173118,0.175664,0.178209
0.00013512,0.000136475,0.000140737,0.000142587,0.000146801,0.000149177,0.000150436,0.00015516,0.000156385,0.000158165,0.000159936,0.000162887,0.000164637,0.000165805
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"a,\"b\nc\"\r\n1,2"
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def get_input : IO String := do
let stdin ← IO.getStdin
stdin.getLine

def factorial : Nat → Nat
| 0 => 1
| n + 1 => (n + 1) * factorial n

def factorialm (n : Nat) : Nat :=
match n with
| 0 => 1
| n + 1 => (n + 1) * factorialm n

example : factorial = factorialm := by rfl

def main : IO Unit := do
IO.println "Enter number:"
let num ← get_input
match num.trim.toNat? with
| some num =>
let fact := factorial (num)
IO.println s!"Factorial: {fact}"
| none => IO.println "Invalid entry. Please enter a valid natural number."


-- What's going on with String.toNat? ? This has type Option Nat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
def main : IO Unit := IO.println "Hello, world!"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def main : IO Unit := do
let stdin ← IO.getStdin

IO.println "How would you like to be addressed?"
let input ← stdin.getLine
let name := input.dropRightWhile Char.isWhitespace
IO.println s!"Hello, {name}!"
Empty file.
12 changes: 12 additions & 0 deletions Lean Code/Lecture 9 - lists, arrays, indexing, IO/Option.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- Here's how Option works:
def safeDivide (a b : Nat) : Option Nat :=
if b == 0 then
none
else
some (a / b)

def main : IO Unit := do
let result1 := safeDivide 10 2
let result2 := safeDivide 10 0
IO.println s!"10 / 2 = {result1}"
IO.println s!"10 / 0 = {result2}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/- Strings! -/

-- Define a string with double quotes
def string_example := "hello"
-- Single quotes are used for characters
def char_example := 'c'
-- string to list
#eval string_example.toList
--list of char to joined string
#eval ['h','e','l','l','o'].asString
-- list of strings to string
#eval string_example.endsWith "o"
#eval "hello ".trim
#eval string_example.toUpper
-- string concantenation methods
#eval string_example ++ " everyone"
-- passing values into strings
#eval s!"{string_example} everyone"
-- Reverse a string (example of chaining string methods)
#eval string_example.toList.reverse.asString
def string_example2 :="12"
-- number parsing
#eval string_example2.toNat!
#eval string_example2.toInt!

def float_parsing (str:String) :=
let parts := str.split (· == '.')
match parts with
| [intPart, fracPart] =>
let fullnum :=(intPart++fracPart).toNat!
let plc:=fracPart
some (Float.ofScientific fullnum true plc.length)
| _ => none


#eval float_parsing "12.532"
Loading

0 comments on commit b3fe3c0

Please sign in to comment.