-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
42 changed files
with
2,220 additions
and
1 deletion.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file not shown.
158 changes: 158 additions & 0 deletions
158
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/CSVCat.lean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
/- Chris Lovett is the author of this CSV parser -/ | ||
import Lean.Data.Parsec | ||
|
||
open Lean Parsec | ||
|
||
/- | ||
# The structure of the CSV file | ||
Each line in a CSV file represents a row, also called a "record," and each record | ||
contains multiple entries called "fields," separated by commas. | ||
-/ | ||
|
||
/-- `Field_CSV` represents a single entry in a CSV record | ||
and is an alias for `String`. | ||
-/ | ||
|
||
abbrev Field_CSV := String | ||
|
||
/-- `Record` is an alias for `Array Field_CSV`, representing a single row in a CSV file. | ||
An `Array` is preferred to a List since its fixed-size in nature | ||
and the elements of the same type. -/ | ||
abbrev Record := Array Field_CSV | ||
|
||
/-- `Csv` is an alias for `Array Record`, representing the entire content of a CSV file.-/ | ||
abbrev Csv := Array Record | ||
|
||
/- | ||
## The parser | ||
The functions below establish the grounds for how the parser should interpret different parts of a CSV file. | ||
The parser code processes an input and rendere a structured output or an error. | ||
-/ | ||
|
||
/-- `textData` matches characters that are not | ||
enclosed in quotes `""`, so that it does not parse any unwanted field | ||
-- It matches any character except control characters (like newline or tab).-/ | ||
def textData : Parsec Char := satisfy fun c => | ||
-- The operation below checks if the character's Unicode value falls within certain ranges. | ||
0x20 ≤ c.val ∧ c.val ≤ 0x21 ∨ -- Space to '!' (excluding double quote) | ||
0x23 ≤ c.val ∧ c.val ≤ 0x2B ∨ -- '#' to '+' | ||
0x2D ≤ c.val ∧ c.val ≤ 0x7E -- '-' to '~' (almost all printable characters) | ||
|
||
/-- `cr`, `lf`, and `crlf` are parsers for carriage return, line feed, and their combination.-/ | ||
-- These are common newline characters used in text files across different types of OS/machines.-/ | ||
def cr : Parsec Char := pchar '\r' -- Carriage Return (used in older Mac systems)-/ | ||
def lf : Parsec Char := pchar '\n' -- Line Feed (used in Unix/Linux and modern Mac systems)-/ | ||
def crlf : Parsec String := pstring "\r\n" -- CR+LF combination (used in Windows)-/ | ||
|
||
/-- `comma` matches a comma `,`, used to separate fields in a CSV file.-/ | ||
def comma : Parsec Char := pchar ',' | ||
|
||
/-- `dQUOTE` matches a double quote character, which is used to escape fields in a Csv file.-/ | ||
def dQUOTE : Parsec Char := pchar '\"' | ||
|
||
/-- `twoDQUOTE` matches two consecutive double quotes `""`, which represent an escaped double quote within a quoted field.-/ | ||
def twoDQUOTE : Parsec Char := attempt (pchar '"' *> pchar '"') | ||
|
||
/-- `escaped` matches fields surrounded by double quotes. | ||
-- It allows for more complex data inside the field, such as commas and newlines.-/ | ||
def escaped : Parsec String := attempt | ||
dQUOTE *> -- Start with an opening double quote | ||
manyChars (textData <|> comma <|> cr <|> lf <|> twoDQUOTE) -- Allow special characters | ||
<* dQUOTE -- End with a closing double quote | ||
|
||
/-- `nonEscaped` is for fields that are not enclosed in double quotes. | ||
It matches a series of valid characters that do not include special characters like commas or newlines.-/ | ||
def nonEscaped: Parsec String := manyChars textData | ||
|
||
/-- `field` is a parser that can handle both escaped and non-escaped fields.- | ||
It uses the `escaped` parser first and if that fails, it tries `nonEscaped`.-/ | ||
def field : Parsec Field_CSV := escaped <|> nonEscaped | ||
|
||
/-- | ||
`manySep` is a higher-order parser that matches many occurrences of a pattern `p` | ||
separated by a separator `s`. | ||
For example, in a CSV file, fields are separated by commas. | ||
This function returns an array of parsed elements. | ||
-/ | ||
def manySep (p : Parsec α) (s : Parsec β) : Parsec $ Array α := do | ||
manyCore (attempt (s *> p)) #[←p] | ||
|
||
/-- `record` parses a single row of CSV, which is a sequence of fields separated by commas.-/ | ||
def record : Parsec Record := manySep field comma | ||
|
||
/-- `file` parses the entire CSV file, which consists of multiple records separated by newlines.-/ | ||
def file : Parsec $ Array Record := | ||
manySep record (crlf <* notFollowedBy eof) <* (optional crlf) <* eof | ||
|
||
/-- `parse` is a function that takes a string (the content of a CSV file) and returns either | ||
the parsed data successfully or an error message.-/ | ||
def parse (s : String) : Except String $ Array $ Array $ String := | ||
match file s.mkIterator with | ||
| Parsec.ParseResult.success _ res => Except.ok res -- Return the result if successful | ||
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx}: {err}" -- Return an error message | ||
|
||
-- e.g., let's parse a CSV string directly. | ||
|
||
#eval parse ("a,\"b\nc\"\r\n1,2\r\n4,5,6") | ||
|
||
/-- | ||
`manyHomoCore` is a parser that ensures all parsed arrays have the same size. | ||
This is useful for validating that every row in a CSV file has the same number of fields, | ||
which is often a requirement for properly formatted CSV files. | ||
-/ | ||
partial def manyHomoCore (p : Parsec $ Array α) (acc : Array $ Array α) : Parsec $ Array $ Array α := | ||
(do | ||
let first ← p | ||
if acc.size = 0 then | ||
manyHomoCore p (acc.push first) -- If it's the first element, it just adds it | ||
else | ||
if acc.back.size = first.size then | ||
manyHomoCore p (acc.push first) -- If the sizes match, parsing continues | ||
else | ||
fail "expect same size" -- If sizes don't match, error thrown | ||
) | ||
<|> pure acc -- If parsing fails, it returns the accumulated result | ||
|
||
/-- | ||
`manySepHomo` parses many arrays of `p` with the same size separated by `s`. | ||
It is used to parse a CSV file while making sure that all rows have the same number of fields ensuring uniformity. | ||
-/ | ||
def manySepHomo (p : Parsec $ Array α) (s : Parsec β) : Parsec $ Array $ Array α := do | ||
manyHomoCore (attempt (s *> p)) #[←p] | ||
|
||
/-- `file'` is an alternative CSV file parser most likely. | ||
It ensures each row has the same number of fields.-/ | ||
def file' : Parsec $ Array Record := manySepHomo record (crlf <* notFollowedBy eof) <* (optional crlf) <* eof | ||
|
||
/-- `parse'` is a function that parses a string with an additional check for homogeneous field counts across records.-/ | ||
def parse' (s : String) : Except String $ Array $ Array $ String := | ||
match file' s.mkIterator with | ||
| Parsec.ParseResult.success _ res => Except.ok res -- Return the result if successful | ||
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx}: {err}" -- Return an error message | ||
|
||
/-This is by John Velkey.-/ | ||
/-John's function to handle exceptions -/ | ||
/-- This function handles the result of the CSV parsing and returns either the parsed data or an error message. | ||
If parsing fails, it returns a default error message.-/ | ||
def CSVParseExceptHandler (inputParse : Except String (Array (Array String))) : Array $ Array $ String := | ||
match inputParse with | ||
| Except.ok α => α -- If parsing is successful, return the result | ||
| Except.error _ => #[#["CSV Parse Error"]] -- If there's an error, return a default error message | ||
|
||
|
||
def CSVParseIt : IO Unit := do | ||
-- Read through of the CSV file | ||
let fileContent ← IO.FS.readFile "Lecture 9 - lists, arrays, indexing, matrices/CSV Parser/CSV_From_Excel_Test_no_BOM.csv" | ||
|
||
-- Parse the file using the parse function | ||
let parsedCSV := parse fileContent | ||
|
||
-- Handles the result using the CSVParseExceptHandler function | ||
let result := CSVParseExceptHandler parsedCSV | ||
|
||
IO.println result | ||
|
||
#eval CSVParseIt | ||
|
||
-- Note we read the file using the absolute path - get path this way: | ||
#eval IO.currentDir |
4 changes: 4 additions & 0 deletions
4
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/CSV_From_Excel_Test_no_BOM.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Data 1,Data 2,Data 3 | ||
1,2,3 | ||
1,2,3 | ||
1,2,3 |
2 changes: 2 additions & 0 deletions
2
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/CSV_LinReg_From_Doc_Test.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
1.47,1.5,1.52,1.55,1.57,1.6,1.63,1.65,1.68,1.7,1.73,1.75,1.78,1.8,1.83 | ||
52.21,53.12,54.48,55.84,57.2,58.57,59.93,61.29,63.11,64.47,66.28,68.1,69.92,72.19,74.46 |
3 changes: 3 additions & 0 deletions
3
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/HKUST-1_tposed.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
0.000000460,0.000000808,0.000001240,0.000001950,0.000003470,0.000019800,0.000030100,0.000039900,0.000048400,0.000060000,0.000071900,0.000080000,0.000091000,0.000122165,0.000169951,0.000240034,0.000469896,0.001029937,0.002010342,0.003045762,0.003948507,0.004830186,0.005912671,0.006904737,0.007945685,0.00893684,0.009950202,0.019566394,0.02937333,0.039573751,0.053048755,0.059782481,0.070285857,0.080382115,0.090693352,0.100863524,0.145578459,0.201715088,0.255897983,0.306354706,0.355727318,0.400710563,0.450996492,0.500912037,0.550729386,0.600539736,0.650423732,0.699409212,0.749430462,0.799589806,0.810603847,0.820750466,0.830251162,0.840610582,0.850090332,0.860139894,0.870536548,0.880439116,0.890351942,0.900085449,0.909885718,0.920408124,0.930173819,0.940068359,0.949686839,0.959833706,0.969298233,0.979271939,0.989260685,0.998416184 | ||
29.24223573,58.6913572,88.1822873,117.7708096,147.0208837,176.6272698,188.1,197.3,205.7335818,219.4,234.899105,247.5,263.9058595,292.8992342,309.4,320.8391657,333.9,346.7523766,351.561349,354.0055815,355.4263289,356.484218,357.5251441,358.3168714,359.0361979,359.633011,360.2001188,363.8561184,366.7213338,369.6,373.5872441,375.5479842,377.6609013,379.2731153,380.7148902,381.9894138,386.4698639,390.404903,393.2731922,395.4325597,396.994765,398.472277,399.9312104,401.2288073,402.3886749,403.4473961,404.4381916,405.3195484,406.161677,406.9765303,407.2760134,407.4391274,407.5884184,407.7410674,407.8706136,408.0103314,408.1640723,408.3117647,408.4410411,408.5758706,408.7267665,408.8781608,409.027047,409.1678296,409.3365257,409.4983328,409.6727584,409.8817061,410.1012315,410.5466624 | ||
|
2 changes: 2 additions & 0 deletions
2
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/NU-1104.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
0.13493,0.137476,0.142568,0.145114,0.150205,0.152751,0.155297,0.160388,0.162935,0.165481,0.168026,0.173118,0.175664,0.178209 | ||
1154.35,1167.89,1181.45,1190.48,1204.04,1208.57,1222.1,1231.16,1244.69,1253.72,1262.76,1285.32,1294.35,1307.89 |
2 changes: 2 additions & 0 deletions
2
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/NU-1104_For_LRM.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
0.13493,0.137476,0.142568,0.145114,0.150205,0.152751,0.155297,0.160388,0.162935,0.165481,0.168026,0.173118,0.175664,0.178209 | ||
0.00013512,0.000136475,0.000140737,0.000142587,0.000146801,0.000149177,0.000150436,0.00015516,0.000156385,0.000158165,0.000159936,0.000162887,0.000164637,0.000165805 |
1 change: 1 addition & 0 deletions
1
Lean Code/Lecture 9 - lists, arrays, indexing, IO/CSV Parser/csv_test.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"a,\"b\nc\"\r\n1,2" |
26 changes: 26 additions & 0 deletions
26
Lean Code/Lecture 9 - lists, arrays, indexing, IO/GetFactorial.lean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
def get_input : IO String := do | ||
let stdin ← IO.getStdin | ||
stdin.getLine | ||
|
||
def factorial : Nat → Nat | ||
| 0 => 1 | ||
| n + 1 => (n + 1) * factorial n | ||
|
||
def factorialm (n : Nat) : Nat := | ||
match n with | ||
| 0 => 1 | ||
| n + 1 => (n + 1) * factorialm n | ||
|
||
example : factorial = factorialm := by rfl | ||
|
||
def main : IO Unit := do | ||
IO.println "Enter number:" | ||
let num ← get_input | ||
match num.trim.toNat? with | ||
| some num => | ||
let fact := factorial (num) | ||
IO.println s!"Factorial: {fact}" | ||
| none => IO.println "Invalid entry. Please enter a valid natural number." | ||
|
||
|
||
-- What's going on with String.toNat? ? This has type Option Nat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
def main : IO Unit := IO.println "Hello, world!" |
7 changes: 7 additions & 0 deletions
7
Lean Code/Lecture 9 - lists, arrays, indexing, IO/HelloName.lean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
def main : IO Unit := do | ||
let stdin ← IO.getStdin | ||
|
||
IO.println "How would you like to be addressed?" | ||
let input ← stdin.getLine | ||
let name := input.dropRightWhile Char.isWhitespace | ||
IO.println s!"Hello, {name}!" |
Empty file.
12 changes: 12 additions & 0 deletions
12
Lean Code/Lecture 9 - lists, arrays, indexing, IO/Option.lean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
-- Here's how Option works: | ||
def safeDivide (a b : Nat) : Option Nat := | ||
if b == 0 then | ||
none | ||
else | ||
some (a / b) | ||
|
||
def main : IO Unit := do | ||
let result1 := safeDivide 10 2 | ||
let result2 := safeDivide 10 0 | ||
IO.println s!"10 / 2 = {result1}" | ||
IO.println s!"10 / 0 = {result2}" |
36 changes: 36 additions & 0 deletions
36
Lean Code/Lecture 9 - lists, arrays, indexing, IO/StringExamples.lean
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/- Strings! -/ | ||
|
||
-- Define a string with double quotes | ||
def string_example := "hello" | ||
-- Single quotes are used for characters | ||
def char_example := 'c' | ||
-- string to list | ||
#eval string_example.toList | ||
--list of char to joined string | ||
#eval ['h','e','l','l','o'].asString | ||
-- list of strings to string | ||
#eval string_example.endsWith "o" | ||
#eval "hello ".trim | ||
#eval string_example.toUpper | ||
-- string concantenation methods | ||
#eval string_example ++ " everyone" | ||
-- passing values into strings | ||
#eval s!"{string_example} everyone" | ||
-- Reverse a string (example of chaining string methods) | ||
#eval string_example.toList.reverse.asString | ||
def string_example2 :="12" | ||
-- number parsing | ||
#eval string_example2.toNat! | ||
#eval string_example2.toInt! | ||
|
||
def float_parsing (str:String) := | ||
let parts := str.split (· == '.') | ||
match parts with | ||
| [intPart, fracPart] => | ||
let fullnum :=(intPart++fracPart).toNat! | ||
let plc:=fracPart | ||
some (Float.ofScientific fullnum true plc.length) | ||
| _ => none | ||
|
||
|
||
#eval float_parsing "12.532" |
Oops, something went wrong.