Skip to content

Commit

Permalink
docd: add recovery middleware and allow reporting errors to GCP (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathaningram authored Sep 20, 2022
1 parent e52737e commit 6d8c9df
Show file tree
Hide file tree
Showing 5 changed files with 761 additions and 66 deletions.
24 changes: 24 additions & 0 deletions docd/internal/error_reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package internal

import (
"io"

"cloud.google.com/go/errorreporting"
)

// ErrorReporter reports errors.
type ErrorReporter interface {
Report(errorreporting.Entry)
io.Closer
}

// NopErrorReporter is a no-op reporter.
type NopErrorReporter struct{}

var _ ErrorReporter = (*NopErrorReporter)(nil)

// Report implements ErrorReporter.
func (r *NopErrorReporter) Report(e errorreporting.Entry) {}

// Close implements ErrorReporter.
func (r *NopErrorReporter) Close() error { return nil }
80 changes: 80 additions & 0 deletions docd/internal/recovery.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package internal

import (
"errors"
"fmt"
"log"
"net/http"
"runtime/debug"

"cloud.google.com/go/errorreporting"
)

type recoveryHandler struct {
er ErrorReporter
handler http.Handler
}

// RecoveryHandler is HTTP middleware that recovers from a panic, writes a
// 500, reports the panic, logs the panic and continues to the next handler.
func RecoveryHandler(er ErrorReporter) func(h http.Handler) http.Handler {
return func(h http.Handler) http.Handler {
return &recoveryHandler{er: er, handler: h}
}
}

func (h recoveryHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
defer func() {
if rec := recover(); rec != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(`{"error":"internal server error"}`))
h.handle(req, &recovered{rec, debug.Stack()})
}
}()

h.handler.ServeHTTP(w, req)
}

func (h recoveryHandler) handle(r *http.Request, err error) {
stack, _ := stackFromRecovered(err)

e := errorreporting.Entry{
Error: err,
Stack: stack,
Req: r,
}
h.er.Report(e)

log.Println(err)
log.Printf("%s", stack)
}

// recovered represents the return value from a call to recover.
type recovered struct {
// p is the error value passed to the call of panic.
p interface{}
// stack is the panic stack trace.
stack []byte
}

var _ error = (*recovered)(nil)

// Error implements error.
func (e *recovered) Error() string {
if err, ok := e.p.(error); ok {
return err.Error()
}
return fmt.Sprintf("panic: %v", e.p)
}

// stackFromRecovered returns a stack trace and true if the recovdered has a
// stack trace created by this package.
//
// Otherwise it returns nil and false.
func stackFromRecovered(err error) ([]byte, bool) {
var rec *recovered
if errors.As(err, &rec) {
return rec.stack, true
}
return nil, false
}
141 changes: 86 additions & 55 deletions docd/main.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
package main

import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"net/http"

"cloud.google.com/go/errorreporting"

"github.com/gorilla/mux"

"code.sajari.com/docconv"
"code.sajari.com/docconv/docd/internal"
)

var (
inputPath = flag.String("input", "", "The file path to convert and exit; no server")
listenAddr = flag.String("addr", ":8888", "The address to listen on (e.g. 127.0.0.1:8888)")
logLevel = flag.Uint("log-level", 0, "The verbosity of the log")
listenAddr = flag.String("addr", ":8888", "The address to listen on (e.g. 127.0.0.1:8888)")

inputPath = flag.String("input", "", "The file path to convert and exit; no server")

errorReporting = flag.Bool("error-reporting", false, "Whether or not to enable GCP Error Reporting")
errorReportingGCPProjectID = flag.String("error-reporting-gcp-project-id", "", "The GCP project to use for Error Reporting")
errorReportingAppEngineService = flag.String("error-reporting-app-engine-service", "", "The App Engine service to use for Error Reporting")

logLevel = flag.Uint("log-level", 0, "The verbosity of the log")

readabilityLengthLow = flag.Int("readability-length-low", 70, "Sets the readability length low")
readabilityLengthHigh = flag.Int("readability-length-high", 200, "Sets the readability length high")
readabilityStopwordsLow = flag.Float64("readability-stopwords-low", 0.2, "Sets the readability stopwords low")
Expand All @@ -26,6 +39,20 @@ var (
func main() {
flag.Parse()

var er internal.ErrorReporter = &internal.NopErrorReporter{}
if *errorReporting {
var err error
er, err = errorreporting.NewClient(context.Background(), *errorReportingGCPProjectID, errorreporting.Config{
ServiceName: *errorReportingAppEngineService,
OnError: func(err error) {
log.Printf("Could not report error to Error Reporting service: %v", err)
},
})
if err != nil {
log.Fatalf("Could not create Error Reporting client: %v", err)
}
}

// TODO: Improve this (remove the need for it!)
docconv.HTMLReadabilityOptionsValues = docconv.HTMLReadabilityOptions{
LengthLow: *readabilityLengthLow,
Expand All @@ -45,73 +72,77 @@ func main() {
fmt.Print(string(resp.Body))
return
}
serve()
}

// Start the conversion web service
func serve() {
http.HandleFunc("/convert", func(w http.ResponseWriter, r *http.Request) {
// Readability flag. Currently only used for HTML
var readability bool
if r.FormValue("readability") == "1" {
readability = true
if *logLevel >= 2 {
log.Println("Readability is on")
}
}
serve(er)
}

path := r.FormValue("path")
if path != "" {
b, err := docconv.ConvertPathReadability(path, readability)
if err != nil {
// TODO: return a sensible status code for errors like this.
log.Printf("error converting path '%v': %v", path, err)
return
}
w.Write(b)
return
func convert(w http.ResponseWriter, r *http.Request) {
// Readability flag. Currently only used for HTML
var readability bool
if r.FormValue("readability") == "1" {
readability = true
if *logLevel >= 2 {
log.Println("Readability is on")
}
}

// Get uploaded file
file, info, err := r.FormFile("input")
path := r.FormValue("path")
if path != "" {
b, err := docconv.ConvertPathReadability(path, readability)
if err != nil {
log.Println("File upload", err)
// TODO: return a sensible status code for errors like this.
log.Printf("error converting path '%v': %v", path, err)
return
}
defer file.Close()
w.Write(b)
return
}

// Abort if file doesn't have a mime type
if len(info.Header["Content-Type"]) == 0 {
log.Println("No content type", info.Filename)
return
}
// Get uploaded file
file, info, err := r.FormFile("input")
if err != nil {
log.Println("File upload", err)
return
}
defer file.Close()

// If a generic mime type was provided then use file extension to determine mimetype
mimeType := info.Header["Content-Type"][0]
if mimeType == "application/octet-stream" {
mimeType = docconv.MimeTypeByExtension(info.Filename)
}
// Abort if file doesn't have a mime type
if len(info.Header["Content-Type"]) == 0 {
log.Println("No content type", info.Filename)
return
}

if *logLevel >= 1 {
log.Println("Received file: " + info.Filename + " (" + mimeType + ")")
}
// If a generic mime type was provided then use file extension to determine mimetype
mimeType := info.Header["Content-Type"][0]
if mimeType == "application/octet-stream" {
mimeType = docconv.MimeTypeByExtension(info.Filename)
}

data, err := docconv.Convert(file, mimeType, readability)
if err != nil {
log.Printf("error converting data: %v", err)
data = &docconv.Response{
Error: err.Error(),
}
}
if *logLevel >= 1 {
log.Println("Received file: " + info.Filename + " (" + mimeType + ")")
}

if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("error marshaling JSON data: %v", err)
return
data, err := docconv.Convert(file, mimeType, readability)
if err != nil {
log.Printf("error converting data: %v", err)
data = &docconv.Response{
Error: err.Error(),
}
})
}

if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("error marshaling JSON data: %v", err)
return
}
}

// Start the conversion web service
func serve(er internal.ErrorReporter) {
r := mux.NewRouter()
r.HandleFunc("/convert", convert)

// Start webserver
log.Println("Setting log level to", *logLevel)
log.Println("Starting docconv on", *listenAddr)
log.Fatal(http.ListenAndServe(*listenAddr, nil))
log.Fatal(http.ListenAndServe(*listenAddr, internal.RecoveryHandler(er)(r)))
}
7 changes: 4 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ module code.sajari.com/docconv
go 1.14

require (
cloud.google.com/go/errorreporting v0.2.0
github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 // indirect
github.com/go-resty/resty/v2 v2.3.0 // indirect
github.com/google/go-cmp v0.5.5
github.com/google/go-cmp v0.5.7
github.com/gorilla/mux v1.8.0
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
Expand All @@ -18,7 +20,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/richardlehane/mscfb v1.0.3
github.com/richardlehane/msoleps v1.0.3
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
golang.org/x/text v0.3.2 // indirect
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420
google.golang.org/protobuf v1.28.0
)
Loading

0 comments on commit 6d8c9df

Please sign in to comment.