Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrations Logs E2E Rotation Flaky Fix (v2) #32939

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
b17ac6c
Test integrations logs writes 10 at a time
soberpeach Nov 12, 2024
3bd2114
Switch greater than or equal to
soberpeach Nov 12, 2024
38efced
Rename files to comply with linter
soberpeach Nov 12, 2024
70acb8e
Rename check files to comply with linter
soberpeach Nov 12, 2024
3670985
Add a check for fake intake logs
soberpeach Nov 12, 2024
7529c56
Merge branch 'main' into lucas.liseth/integration-launcher-e2e-test
soberpeach Nov 12, 2024
d19caa8
Remove devmode
soberpeach Nov 13, 2024
86d1004
Check for logs tags
soberpeach Nov 13, 2024
fc36e97
Add max size check
soberpeach Nov 14, 2024
375ddf8
Remove testing for implementation
soberpeach Nov 15, 2024
14c1fca
Merge branch 'lucas.liseth/integration-launcher-e2e-test' into lucas.…
soberpeach Nov 19, 2024
8afa07f
Update maxsize test to check fakeintake
soberpeach Nov 19, 2024
ad92114
Treat fakeintake as a black box
soberpeach Nov 20, 2024
c949b19
Merge branch 'main' into lucas.liseth/integration-log-max-size-e2e-test
soberpeach Nov 20, 2024
758e3ae
Add log ID checking for log rotation test
soberpeach Nov 21, 2024
c17dec4
Merge branch 'main' into lucas.liseth/integration-log-max-size-e2e-test
soberpeach Nov 22, 2024
7430b2d
Prevent truncated logs by reducing log size
soberpeach Nov 25, 2024
f44e61e
Updated tests to use yaml template
soberpeach Dec 5, 2024
f4ef995
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Dec 5, 2024
894174f
Remove comment from python check
soberpeach Dec 5, 2024
61ebd79
Remove flaky monotonic counter
soberpeach Dec 6, 2024
e907737
Combined python check files
soberpeach Dec 6, 2024
8a01fb5
Remove devmode
soberpeach Dec 6, 2024
1b26684
Renamed file
soberpeach Dec 6, 2024
536c064
Update fixtures file
soberpeach Dec 6, 2024
caba1fb
Updated bounds for logs check
soberpeach Dec 6, 2024
f9d74d6
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Dec 10, 2024
ac80d1c
Replace UUID for logs with increasing counts
soberpeach Dec 12, 2024
e00b74f
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Dec 12, 2024
ca4ae9b
Mofify test to remove flakiness
soberpeach Dec 13, 2024
5ca8269
Remove print statement
soberpeach Dec 13, 2024
a1c9f7e
addressed comments
soberpeach Dec 18, 2024
39d0c08
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Dec 23, 2024
2f7d392
Sort logs before checking for checking for match
soberpeach Dec 26, 2024
9ef13f2
Accumulate logs before checking counter
soberpeach Dec 31, 2024
bd62bc2
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Dec 31, 2024
b05c7f0
Disregard first number and only count subsequent
soberpeach Jan 1, 2025
2d770a4
Address PR comments
soberpeach Jan 2, 2025
0451297
Fix logs_test comparison
soberpeach Jan 2, 2025
a597abe
Update comment and log size
soberpeach Jan 2, 2025
02b64c2
Create unique service for each test run
soberpeach Jan 13, 2025
40c7c1f
Merge branch 'main' into lucas.liseth/integration-log-rotation-e2e-test
soberpeach Jan 14, 2025
2fa7b06
Remove devmode
soberpeach Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from datadog_checks.base import AgentCheck
from datadog_checks.base.utils.time import get_timestamp


class HelloCheck(AgentCheck):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.counter = 0 # Initialize increasing variable

def check(self, instance):
data = {}
log_str = instance['log_message']
data['timestamp'] = get_timestamp()
data['ddtags'] = instance['integration_tags']

log_str = instance['log_message']
if instance['unique_message']:
log_str = instance['log_message'] * instance['log_size']
self.counter += 1
log_str = "counter: " + str(self.counter) + ' ' + log_str
data['message'] = log_str
else:
data['message'] = log_str

num_logs = instance['log_count']

for _ in range(num_logs):
self.send_log(data)

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,169 @@ package integrationslogs

import (
_ "embed"
"fmt"
"regexp"
"sort"
"strconv"
"strings"
"testing"
"time"

"github.com/DataDog/test-infra-definitions/components/datadog/agentparams"
"github.com/google/uuid"
"gopkg.in/yaml.v3"

"github.com/DataDog/datadog-agent/test/fakeintake/aggregator"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e"
"github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments"
awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/provisioners/aws/host"
"github.com/DataDog/datadog-agent/test/new-e2e/tests/agent-metrics-logs/log-agent/utils"
"github.com/DataDog/test-infra-definitions/components/datadog/agentparams"
"github.com/stretchr/testify/assert"
)

type IntegrationsLogsSuite struct {
e2e.BaseSuite[environments.Host]
}

//go:embed fixtures/tenLogs.py
var writeTenLogsCheck string
//go:embed fixtures/integration.py
var customIntegration string

type Config struct {
InitConfig interface{} `yaml:"init_config"`
Instances []Instance `yaml:"instances"`
Logs []LogsConfig `yaml:"logs"`
}

//go:embed fixtures/tenLogs.yaml
var writeTenLogsConfig string
type Instance struct {
LogMessage string `yaml:"log_message"`
UniqueMessage bool `yaml:"unique_message"`
LogSize int `yaml:"log_size"`
LogCount int `yaml:"log_count"`
IntegrationTags string `yaml:"integration_tags"`
}

type LogsConfig struct {
Type string `yaml:"type"`
Source string `yaml:"source"`
Service string `yaml:"service"`
}

// TestLinuxFakeIntakeSuite
func TestIntegrationsLogsSuite(t *testing.T) {
suiteParams := []e2e.SuiteOption{
e2e.WithProvisioner(awshost.Provisioner(awshost.WithAgentOptions(
agentparams.WithLogs(),
// set the integration log file max size to 1MB
agentparams.WithAgentConfig("logs_config.integrations_logs_files_max_size: 1"),
agentparams.WithFile("/etc/datadog-agent/checks.d/writeTenLogs.py", writeTenLogsCheck, true),
agentparams.WithFile("/etc/datadog-agent/conf.d/writeTenLogs.yaml", writeTenLogsConfig, true))))}
agentparams.WithAgentConfig("logs_config.integrations_logs_files_max_size: 1"))))}

e2e.Run(t, &IntegrationsLogsSuite{}, suiteParams...)
}

// TestWriteTenLogsCheck ensures a check that logs are written to the file ten
// logs at a time
func (v *IntegrationsLogsSuite) TestWriteTenLogsCheck() {
utils.CheckLogsExpected(v.T(), v.Env().FakeIntake, "ten_logs_service", "Custom log message", []string{"env:dev", "bar:foo"})
tags := []string{"foo:bar", "env:dev"}
yamlData, err := generateYaml("Custom log message", false, 1, 10, tags, "logs_from_integrations_source", "logs_from_integrations_service")
assert.NoError(v.T(), err)

v.UpdateEnv(awshost.Provisioner(awshost.WithAgentOptions(
agentparams.WithLogs(),
agentparams.WithFile("/etc/datadog-agent/conf.d/writeTenLogs.yaml", string(yamlData), true),
agentparams.WithFile("/etc/datadog-agent/checks.d/writeTenLogs.py", customIntegration, true))))

logs, err := utils.FetchAndFilterLogs(v.Env().FakeIntake, "logs_from_integrations_service", "Custom log message")
assert.Nil(v.T(), err)
assert.GreaterOrEqual(v.T(), len(logs), 10)
}

// TestIntegrationLogFileRotation ensures logs are captured after a integration
// log file is rotated
func (v *IntegrationsLogsSuite) TestIntegrationLogFileRotation() {
// Since it's not yet possible to write to the integration log file by calling
// the agent check command, we can test if the file rotation works using the following method:

// 1. Set the max log file size to 1 MB and individual log size to a size
// large enough to cause rotations every 4 logs. 255 KB was chosen since the
// log is JSON formatted and includes information such as tags, service,
// source, and other information in the log.
// 2. Send five (or more) logs to the agent, causing the log file to rotate.
// 3. Check the logs to ensure that each is unique, ensuring the rotation worked correctly.

tags := []string{"test:rotate"}
serviceString := fmt.Sprintf("%s_%s", uuid.NewString(), time.Now().String())
yamlData, err := generateYaml("a", true, 1024*255, 1, tags, "rotation_source", serviceString)
assert.NoError(v.T(), err)

v.UpdateEnv(awshost.Provisioner(awshost.WithAgentOptions(
agentparams.WithLogs(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't look like the integrations_logs_files_max_size setting survives this updateEnv call, meaning that the log files are't actually getting rotated during this test.

agentparams.WithFile("/etc/datadog-agent/conf.d/rotation.yaml", string(yamlData), true),
agentparams.WithFile("/etc/datadog-agent/checks.d/rotation.py", customIntegration, true))))

// The log file should rotate every four logs, so checking through five
// iterations should guarantee a rotation. The counters in the logs should
// also contain the numbers 1 through 5

// Accumulate logs until there are at least 5
var receivedLogs []*aggregator.Log
assert.EventuallyWithT(v.T(), func(c *assert.CollectT) {
receivedLogs, err = utils.FetchAndFilterLogs(v.Env().FakeIntake, serviceString, ".*counter: \\d+.*")
assert.NoError(c, err)
assert.GreaterOrEqual(c, len(receivedLogs), 5)

}, 2*time.Minute, 5*time.Second)

// Check the logs to ensure they're unique and rotation worked correctly

// Sort the logs slice in ascending order according to timestamp. This
// guarantees that the last written log will be in last position in the
// slice. This is needed since FetchAndFilterLogs doesn't guarantee order
sort.Slice(receivedLogs, func(j, k int) bool {
return receivedLogs[j].Timestamp < receivedLogs[k].Timestamp
})

// Extract the count from the first log
compareCount := extractNumberFromLog(v.T(), receivedLogs[0])

// Ensure numbers from subsequent logs are increasing
for i := 1; i < 5; i++ {
compareCount++
currentLogCount := extractNumberFromLog(v.T(), receivedLogs[i])
assert.Equal(v.T(), compareCount, currentLogCount)
}
}

// extractNumberFromLog extracts the a number from the log message
func extractNumberFromLog(t *testing.T, log *aggregator.Log) int {
regex := regexp.MustCompile(`counter: (\d+)`)
matches := regex.FindStringSubmatch(log.Message)
assert.Greater(t, len(matches), 1, "Did not find matching \"count\" regular expression in log")
number := matches[1]
count, err := strconv.Atoi(number)
assert.Nil(t, err)
return count
}

// generateYaml Generates a YAML config for checks to use
func generateYaml(logMessage string, uniqueMessage bool, logSize int, logCount int, integrationTags []string, logSource string, logService string) ([]byte, error) {
// Define the YAML structure
config := Config{
InitConfig: nil,
Instances: []Instance{
{
LogMessage: logMessage,
UniqueMessage: uniqueMessage,
LogSize: logSize,
LogCount: logCount,
IntegrationTags: strings.Join(integrationTags, ","),
},
},
Logs: []LogsConfig{
{
Type: "integration",
Source: logSource,
Service: logService,
},
},
}

return yaml.Marshal(&config)
}
Loading