Building a Go Structured Logger | Crit Russell

August 10, 2019

Building a Go Structured Logger

Why structured logging?

Basically: to use your logs to get aggregate data. If you can consolidate them (for example, with a logging server/service), then you should be able to pull interesting information about the health of your systems and possibly even useful data for your business and/or product.

Why another log package?

It’s an example of what you can do with Go’s standard packages. I’ve used something similar in the past to help with combining libraries that have features I want in an easy-to-use internal API.

Code

This was designed from looking at the implementation of the log package in the Go standard library.

Lets start with the core systems for this new internal log package:

// project/internal/log/log.go
package log

// imports... (only using the standard library)

// Loggable is any structure that can be passed directly to the logger instance. 
//
// logger.With(user, field, device) 
// vs 
// logger.With(log.Data{"user": user.id, "field": field.value, "device": device.id})
type Loggable interface {
  Log() map[string]interface{}
}

// Data creates a shortcut to the Loggable interface that can be used
// by the client by reducing the amount of noise in the log call:
//
// logger.With(map[string]interface{"key": "someValue"})
// vs 
// logger.With(log.Data{"key": "someValue"})
type Data map[string]interface{}

// Log satisfies the Loggable interface
func (d Data) Log() map[string]interface{} {
  return d
}

// New creates a new Log instance with a specific name and writer. The writer 
// being io.Writer is the key to making this system extensible.
func New(appName string, out io.Writer) *Log {
  return &Log{app: appName, Out: out}
}

// Log is the core structure that holds all methods used by clients. The `Out` 
// property is exported so that we can reuse the io.Writer in other places
// which has been required to make some middleware useful.
type Log struct {
  app  string
  data map[string]interface{}
  mu   sync.Mutex
  Out  io.Writer
}

// WriteLog is the structure of the log used when writing data out. This could be 
// customized to anything needed by a logging service. This also assumes JSON 
// formatting (which seems to be popular with logging services) but could be any format.
type WriteLog struct {
  Time  time.Time `json:"time"`
  App   string    `json:"app"`
  Level string    `json:"level"`
  Msg   string    `json:"msg"`
  Data  Data      `json:"data,omitempty"`
  Src   src       `json:"src"`
}

// src is part of WriteLog that details log call location in the client. This is 
// super useful for detecting problems in live code.
type src struct {
  File string `json:"file"`
  Line int    `json:"line"`
}

// AppName retrieves the log instance's current app name. Useful in middleware.
func (l *Log) AppName() string {
  return l.app
}

// With saves specific data to be written out when Info, Error, or Warn is called.
func (l *Log) With(data ...Loggable) Log {
  // set will allow us to detect when a key has already been created with a value
  // and change the value to a slice of values if the key is presented again for logging.
  // logger := log.New(...).With(log.Data{"key": "v1"}) => {... "key":"v1" ...}
  // logger.With(log.Data{"key": "v2"})                 => {... "key":["v1","v2"] ...}
  set := make(map[string]interface{})
  
  // Right now this implementation is memory hungry. There is much room for improvement.
  for key, value := range l.data {
    set[key] = value
  }
  
  for _, node := range data {
    for key, value := node.Log() {
      // do we have a current key already?
      if current, ok := set[key]; ok {
        // is the current value already a slice?
        if s, ok := current.([]interface{}); ok {
          // append to old slice
          s = append(s, value)
          set[key] = s
          continue
        }
        
        // create a new slice
        set[key] = []interface{}{current, value}
        continue
      }
      
      // create the new entry
      set[key] = value
    }
  }
  
  return Log{
    app: l.app,
    data: Data(set),
    Out: l.Out,
  }
}

// Warn sends the log data to the writer with appropriate level.
func (l Log) Warn(msg string) {
  l.output(2, "warn", msg)
}

// Error sends the log data to the writer with appropriate level.
func (l Log) Error(errorOrMsg interface{}) {
  l.output(2, "error", errorParse(errorOrMsg))
}

// Info sends the log data to the writer with appropriate level.
func (l Log) Info(msg string) {
  l.output(2, "info", msg)
}

// output creates the structure log and sends it to the writer.
func (l *Log) output(callDepth int, level, msg string) {
  var out WriteLog
  var ok bool
  
  // do some "expensive" processing here like getting UTC and
  // caller info (which really _is_ expensive)
  
  out.Time = time.Now().UTC()
  
  _, out.Src.File, out.Src.Line, ok = runtime.Caller(callDepth)
  
  if !ok {
    out.Src.File = "???"
    out.Src.Line = 0
  } else {
    out.Src.File = srcFileParse(out.Src.File)
  }
  
  out.Level = level
  out.Msg = msg
  out.Data = map[string]interface{}
  
   // lock to protect from mutations while copying data from logger to log entry
  l.mu.Lock()
  
  // avoiding `defer l.mu.Unlock()` so we can unlock as soon as possible to allow other
  // channels to continue their processes since there is no reason to lock for the 
  // JSON marshalling and writing.
  
  out.App = l.app
  
  for key, value := range l.data {
    out.Data[key] = value
  }
  
  // clear existing data to prevent repeated writes
  l.data = make(map[string]interface{})
  
  l.mu.Unlock() // unlock for more "expensive" processing
  
  data, err := json.Marshal(out)
  
  if err != nil {
    // This should "never" happen, but "never" is only a matter of time in distributed 
    // systems so lets handle it.
    // I highly encourage making this message something that your logging service
    // can easily detect and attaching a mission critical alert to it.
    data = []byte("Logger unable to marshal log output to JSON: " + err.Error())
  }
  
  l.Out.Write(data)
}

// errorParse handles transforming the message into an appropriate string.
func errorParse(errorOrMsg interface{}) string {
  if msg, ok := errorOrMsg.(string); ok {
    return msg
  }
  
  if err, ok := errorOrMsg.(error); ok {
    return err.Error()
  }
  
  return fmt.Sprintf("%v", errorOrMsg)
}

// srcFileParse returns either the filename and extension, or the last directory
// (which is also usually the package name in Go) with the filename and extension.
// "project/src/model/user.go" => "model/user.go"
// "main.go" => "main.go
func srcFileParse(filename string) string {
  // "project/src/model/user.go" => "project/src/model", "user.go"
  dir, file := filepath.Split(filename)
  
  // "project/src/model" => ["project", "src", "model"]
  parts := strings.FieldsFunc(dir, func(c rune) bool {
    return c == filepath.Separator
  })
  
  if len(parts) > 0 {
    // => "model/user.go"
    return filepath.Join(parts[len(parts)-1], file)
  }
  
  // "user.go"
  return file
}

Tests for this implementation would look something like this:

// project/internal/log/log_test.go
package log

// imports... (using stretchr/testify/assert library)

func TestInfo(t *testing.T) {
  var out bytes.Buffer
  
  New("info-test", &out).Info("hello")
  
  expected := `"app":"info-test","level":"info","msg":"hello","src":{"file":"log/log_test.go"`
  
  assert.True(t, strings.Contains(out.String(), expected), out.String())
}

// func TestWarning(t *testing.T) ...
// func TestErrorString(t *testing.T) ...
// func TestError(t *testing.T) ...

func TestWith(t *testing.T) {
	var out bytes.Buffer

	l := New("with-test", &out)

	l.With(Data{"country": "us", "limit": 10}).Info("hello")

	expected := `"app":"with-test","level":"info","msg":"hello","data":{"country":"us","limit":10}`

	assert.True(t, strings.Contains(out.String(), expected), out.String())
}

type model struct {
	ID   int    `json:"id"`
	Name string `json:"name"`
}

func (m model) Log() map[string]interface{} {
	return Data{"model": m}
}

func TestWithStruct(t *testing.T) {
	var out bytes.Buffer
	var a model

	l := New("with-struct-test", &out)

	a.ID = 1
	a.Name = "Example"

	l.With(a).Info("hello")

	needle := `"data":{"model":{"id":1,"name":"Example"}}`

	assert.Equal(t, 1, strings.Count(out.String(), needle), out.String())
}

// func TestWithStructCollision(t *testing.T) ...
// func BenchmarkLoggingCollisions(b *testing.B) ...

Alright, lets create a writer for standard out (just reusing the fmt package):

// project/internal/log/std_writer.go
package log

// imports...

// Writer satisfies io.Write interface
type Writer struct {}

func (Writer) Write(p []byte) (int, error) {
  return fmt.Println(string(p))
}

Allows us to do this in client code:

// project/cmd/product/main.go
package main

import (
  "github.com/org/project/internal/log"  
)

func main() {
  logger := log.New("product", log.Writer{})
  
  logger.With(log.Data{"hello": "world"}).Info("example")
  // running the program after building (or possibly from `go run main.go`) will yield 
  // this in the cli:
  // {..., "app":"product","level":"info","msg":"example","data":{"hello":"world"}, ...}
}

Now lets do a custom writer. In this case its an http writer that posts logs to a remote server/service:

// project/internal/log/post_writer.go
package log

// imports...

// transport with reasonable timeouts for log transfer over the network
var transport = &http.Transport{
	Dial: (&net.Dialer{
		Timeout: 1 * time.Second,
	}).Dial,
	TLSHandshakeTimeout: 1 * time.Second,
}

// client with a reasonable timeout and our transport
var client = &http.Client{
	Timeout:   time.Second * 3,
	Transport: transport,
}

// NewPostWriter gives us a new Writer that we can pass to a logger instance
func NewPostWriter(targetURL string, writer io.Writer) io.Writer {
  if _, err := url.ParseRequestURI(target); err != nil {
    log.Printf("NewPostWriter targetURL '%s' invalid: %s\n", targetURL, err.Error())
    targetURL = ""
  }

  return &PostWriter{target: targetURL, writer: writer}
}

// PostWriter is the standard data structure for handling HTTP POST requests for a 
// logging instance
type PostWriter struct {
  target    string
  targetMux sync.Mutex
  writer    io.Writer
}

// SetURL lets a client change the target server address for logging after 
// writer initialization.
func (w *PostWriter) SetURL(target string) error {
  _, err := url.ParseRequestURI(target)
  
  if err != nil {
    return fmt.Errorf("write target url invalid: %v", err)
  }
  
  w.targetMux.Lock()
  defer w.targetMux.Unlock()

  w.target = target

  return nil
}

// Write satisfies the io.Writer interface
func (w *PostWriter) Write(p []byte) (int, error) {
  if w.target != "" {
    // We have a target address, lets send an HTTP Post with the payload to that server.
    // This is done with a new Go channel and any errors will be dumped to standard out.
    // It's possible we could make this more robust with a backoff, retry, 
    // circuit breaker, and a fixed buffer queue.
    // This works for now.
    go func(target string, p []byte) {
      res, err := client.Post(target, "application/json", bytes.NewReader(p))
      
      if err != nil {
        log.Printf("internal/log PostWriter error: %s", err.Error())
        return
      }
      
      defer res.Body.Close()
      
      if res.StatusCode >= 300 {
        log.Printf("internal/log PostWriter error: %s", res.Status)
      }
    }(w.target, p)
  }
  
  // while that HTTP Post is happening in the background, lets immediately write 
  // out to whatever writer the client has as backup. Most likely log.Writer{}.
  return w.writer.Write(p)
}

And now the client can do this:

// project/cmd/product/main.go
package main

import (
  "github.com/org/project/internal/log"  
)

func main() {
  logger := log.New("product", log.NewPostWriter("https://example/injest", log.Writer{}))
  
  logger.With(log.Data{"hello": "world"}).Info("example")
  // running the program after building (or possibly from `go run main.go`) will yield 
  // this in the cli:
  // {..., "app":"product","level":"info","msg":"example","data":{"hello":"world"}, ...}
  // and this log value will be posted to the remote server (if the network allows).
}

You can also create middleware (this one for Echo):

// project/internal/log/echo.go
package log

// imports...

// ForEcho makes the middleware needed to send echo logs to the Log Writer.
// see https://github.com/labstack/gommon/blob/master/log/log.go for 
// format string reference
func ForEcho(logger Log) echo.MiddlewareFunc {
  fname := "???"
  lnum := 0
  
  _, file, line, ok := runtime.Caller(1)
  
  if ok {
    fname = srcFileParse(file)
    lnum = line
  }
  
  // format needed by LoggerWithConfig; notice that we are adding data to this string
  // using fmt.Sprintf. Namely app, src.file and src.line entries. This does have a
  // problem that all logs are going to say the source is the same file/line. But since
  // these logs are usually from the framework itself, its fine for now.
  f := `{"time":"${time_rfc3339}","app":"%s","level":"info","msg":"${method}",` +
    `"data":{"remote":"${remote_ip}","uri":"${uri}","status":${status},` +
    `"latency":"${latency_human}"},"src":{"file":"%s","line":%d}}`

  return middleware.LoggerWithConfig(middleware.LoggerConfig{
    Format: fmt.Sprintf(f, logger.AppName(), fname, lnum),
    Output: logger.Out,
  })
}

© Crit Russell