From e5ece87764bc9bd449e8fbf039ff08b091b7b93a Mon Sep 17 00:00:00 2001
From: Szymon Mikitiuk <singlet.bunches_0m@icloud.com>
Date: Thu, 1 Feb 2024 20:21:59 +0100
Subject: [PATCH 1/4] add a complete solution for link exercise using the
 Tokenizer

---
 link/main.go      | 97 +++++++++++++++++++++++++++++++++++++----------
 link/main_test.go | 22 -----------
 2 files changed, 76 insertions(+), 43 deletions(-)

diff --git a/link/main.go b/link/main.go
index 00e3ef7..f65c033 100644
--- a/link/main.go
+++ b/link/main.go
@@ -2,8 +2,12 @@ package main
 
 import (
 	"bytes"
+	"flag"
 	"fmt"
+	"io"
+	"log"
 	"os"
+	"strings"
 
 	"golang.org/x/net/html"
 )
@@ -13,40 +17,91 @@ type Link struct {
 	Text string
 }
 
-func catch[T any](val T, err error) T {
-	if err != nil {
-		fmt.Println(err)
-		os.Exit(1)
-	}
-	return val
+func (l Link) String() string {
+	return fmt.Sprintf("{href='%s', text='%s'}", l.Href, l.Text)
 }
 
 func main() {
+	inputFile := readFile(*parseUserInput())
+	defer inputFile.Close()
+	links := parseLinks(*inputFile)
+	log.Println(links)
+}
+
+func parseUserInput() *string {
+	htmlFilePath := flag.String("file", "", "Path to the HTML file")
+	flag.Parse()
+
+	if *htmlFilePath == "" {
+		flag.Usage()
+		log.Fatalln("Error: HTML file path is required.")
+	}
+
+	return htmlFilePath
+}
+
+func readFile(path string) *os.File {
+	file, err := os.Open(path)
+	if err != nil {
+		log.Fatalf("Error reading HTML file: '%s': %v", path, err)
+	}
 
-	content := catch(os.ReadFile("ex3.html"))
+	return file
+}
 
-	reader := bytes.NewReader(content)
-	tokenizer := html.NewTokenizer(reader)
+func parseLinks(file os.File) []Link {
+	tokenizer := html.NewTokenizer(&file)
+	var links []Link
+	var buffer bytes.Buffer
+	var catchText bool
+	var link Link
 
-	// var links []string
 	for {
-		t := tokenizer.Next()
-		if t == html.ErrorToken {
-			//fmt.Println("Error token")
-			// fmt.Println(t)
+		tokenType := tokenizer.Next()
+		err := processErrorToken(tokenizer, tokenType)
+		if err != nil {
 			break
 		}
 
-		// fmt.Println(z.Token().Attr)
-		token := tokenizer.Token()
-		if token.Data == "a" && len(token.Attr) > 0 {
-			for _, attr := range token.Attr {
-				if attr.Key == "href" {
-					fmt.Println(attr.Val)
+		switch tokenType {
+		case html.StartTagToken:
+			token := tokenizer.Token()
+			if token.DataAtom.String() == "a" && len(token.Attr) > 0 {
+				for _, attr := range token.Attr {
+					if attr.Key == "href" {
+						link.Href = attr.Val
+						catchText = true
+					}
 				}
 			}
-		}
 
+		case html.TextToken:
+			if catchText {
+				buffer.Write(tokenizer.Raw())
+			}
+
+		case html.EndTagToken:
+			token := tokenizer.Token()
+			if token.DataAtom.String() == "a" {
+				link.Text = strings.TrimSpace(buffer.String())
+				links = append(links, link)
+				buffer.Reset()
+				catchText = false
+			}
+		}
 	}
 
+	return links
 }
+
+func processErrorToken(tokenizer *html.Tokenizer, tokenType html.TokenType) error {
+	if tokenType == html.ErrorToken {
+		err := tokenizer.Err()
+		if err != io.EOF {
+			log.Fatalln("Error when parsing HTML", err)
+		}
+		return err
+	}
+	return nil
+}
+
diff --git a/link/main_test.go b/link/main_test.go
index 25079d7..70d5ab8 100644
--- a/link/main_test.go
+++ b/link/main_test.go
@@ -1,7 +1,6 @@
 package main
 
 import (
-	"reflect"
 	"testing"
 )
 
@@ -17,24 +16,3 @@ func Test_main(t *testing.T) {
 		})
 	}
 }
-
-func Test_catch(t *testing.T) {
-	type args struct {
-		val []byte
-		err error
-	}
-	tests := []struct {
-		name string
-		args args
-		want []byte
-	}{
-		// TODO: Add test cases.
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if got := catch(tt.args.val, tt.args.err); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("catch() = %v, want %v", got, tt.want)
-			}
-		})
-	}
-}

From 707a4bb85502d11735add002b628ca66b0fe6157 Mon Sep 17 00:00:00 2001
From: Szymon Mikitiuk <singlet.bunches_0m@icloud.com>
Date: Thu, 1 Feb 2024 20:37:45 +0100
Subject: [PATCH 2/4] Add go.work.sum file

Some explanation courtesy of ChatGPT: https://chat.openai.com/share/ac97becd-7552-4b8f-ba3b-80fdea5a6849
---
 go.work.sum | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 go.work.sum

diff --git a/go.work.sum b/go.work.sum
new file mode 100644
index 0000000..c3d903c
--- /dev/null
+++ b/go.work.sum
@@ -0,0 +1,2 @@
+golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=

From fbb91d654acf46df0fb6951f0c52d1eb28f971e8 Mon Sep 17 00:00:00 2001
From: Py Explorer <pyexplorer@hs3.pl>
Date: Thu, 1 Feb 2024 20:58:05 +0100
Subject: [PATCH 3/4] session-10: from switch case to else if :D

---
 link/main.go | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/link/main.go b/link/main.go
index f65c033..7d57ae7 100644
--- a/link/main.go
+++ b/link/main.go
@@ -63,8 +63,7 @@ func parseLinks(file os.File) []Link {
 			break
 		}
 
-		switch tokenType {
-		case html.StartTagToken:
+		if tokenType == html.StartTagToken {
 			token := tokenizer.Token()
 			if token.DataAtom.String() == "a" && len(token.Attr) > 0 {
 				for _, attr := range token.Attr {
@@ -74,13 +73,11 @@ func parseLinks(file os.File) []Link {
 					}
 				}
 			}
-
-		case html.TextToken:
+		} else if tokenType == html.TextToken {
 			if catchText {
 				buffer.Write(tokenizer.Raw())
 			}
-
-		case html.EndTagToken:
+		} else if tokenType == html.EndTagToken {
 			token := tokenizer.Token()
 			if token.DataAtom.String() == "a" {
 				link.Text = strings.TrimSpace(buffer.String())
@@ -104,4 +101,3 @@ func processErrorToken(tokenizer *html.Tokenizer, tokenType html.TokenType) erro
 	}
 	return nil
 }
-

From 70f9d43e2eb0b124981dacce0d3fffde40abfabd Mon Sep 17 00:00:00 2001
From: Py Explorer <pyexplorer@hs3.pl>
Date: Thu, 8 Feb 2024 19:31:16 +0100
Subject: [PATCH 4/4] session-10: add comment

---
 link/main.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/link/main.go b/link/main.go
index 7d57ae7..f2b96d4 100644
--- a/link/main.go
+++ b/link/main.go
@@ -29,7 +29,7 @@ func main() {
 }
 
 func parseUserInput() *string {
-	htmlFilePath := flag.String("file", "", "Path to the HTML file")
+	htmlFilePath := flag.String("file", "ex2.html", "Path to the HTML file")
 	flag.Parse()
 
 	if *htmlFilePath == "" {
@@ -53,6 +53,8 @@ func parseLinks(file os.File) []Link {
 	tokenizer := html.NewTokenizer(&file)
 	var links []Link
 	var buffer bytes.Buffer
+	// we can use string here as well
+	// var text string
 	var catchText bool
 	var link Link
 
@@ -76,6 +78,7 @@ func parseLinks(file os.File) []Link {
 		} else if tokenType == html.TextToken {
 			if catchText {
 				buffer.Write(tokenizer.Raw())
+				// text += string(tokenizer.Raw())
 			}
 		} else if tokenType == html.EndTagToken {
 			token := tokenizer.Token()
@@ -83,6 +86,7 @@ func parseLinks(file os.File) []Link {
 				link.Text = strings.TrimSpace(buffer.String())
 				links = append(links, link)
 				buffer.Reset()
+				// text = ""
 				catchText = false
 			}
 		}