Skip to content

Commit

Permalink
Fix CML (#1450)
Browse files Browse the repository at this point in the history
* Fix CML

* fixup! Fix CML

* fixup! Fix CML
  • Loading branch information
0x2b3bfa0 authored May 10, 2024
1 parent 6638d38 commit 24ca0df
Show file tree
Hide file tree
Showing 22 changed files with 160 additions and 164 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/test-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ jobs:
TEST_GITLAB_REPO: https://gitlab.com/iterative.ai/cml_qa_tests_dummy
TEST_GITLAB_SHA: f8b8b49a253243830ef59a7f090eb887157b2b67
TEST_GITLAB_ISSUE: 1
TEST_BBCLOUD_TOKEN: ${{ secrets.TEST_BBCLOUD_TOKEN }}
TEST_BBCLOUD_REPO: https://bitbucket.org/iterative-ai/cml-qa-tests-dummy
TEST_BBCLOUD_SHA: b511535a89f76d3d311b1c15e3e712b15c0b94e3
TEST_BBCLOUD_ISSUE: 1
TEST_BITBUCKET_TOKEN: ${{ secrets.TEST_BITBUCKET_TOKEN }}
TEST_BITBUCKET_REPO: https://bitbucket.org/iterative-ai/cml-qa-tests-dummy
TEST_BITBUCKET_SHA: b511535a89f76d3d311b1c15e3e712b15c0b94e3
TEST_BITBUCKET_ISSUE: 1
test-os:
needs: authorize
name: test-${{ matrix.system }}
Expand Down
28 changes: 2 additions & 26 deletions bin/cml.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const { pseudoexec } = require('pseudoexec');

const kebabcaseKeys = require('kebabcase-keys');
const which = require('which');
const winston = require('winston');
const { logger, setupLogger } = require('../src/logger');
const yargs = require('yargs');

const CML = require('../src/cml').default;
Expand Down Expand Up @@ -72,30 +72,6 @@ const setupOpts = (opts) => {
opts.cml = new CML(opts);
};

const setupLogger = (opts) => {
const { log: level } = opts;

winston.configure({
format: process.stdout.isTTY
? winston.format.combine(
winston.format.colorize({ all: true }),
winston.format.simple()
)
: winston.format.combine(
winston.format.errors({ stack: true }),
winston.format.json()
),
transports: [
new winston.transports.Console({
stderrLevels: Object.keys(winston.config.npm.levels),
handleExceptions: true,
handleRejections: true,
level
})
]
});
};

const setupTelemetry = async (opts, yargs) => {
const { cml, _: command } = opts;

Expand Down Expand Up @@ -201,7 +177,7 @@ const handleError = (message, error) => {
const event = { ...telemetryEvent, error: err.message };
await send({ event });
}
winston.error(err);
logger.error(err);
process.exit(1);
}
})();
1 change: 0 additions & 1 deletion bin/cml.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ describe('command-line interface tests', () => {
cml.js comment Manage comments
cml.js pr <glob path...> Manage pull requests
cml.js runner Manage self-hosted (cloud & on-premise) CI runners
cml.js tensorboard Manage tensorboard.dev connections
cml.js workflow Manage CI workflows
cml.js ci Prepare Git repository for CML operations
Expand Down
4 changes: 2 additions & 2 deletions bin/cml/asset/publish.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const fs = require('fs').promises;
const kebabcaseKeys = require('kebabcase-keys');
const winston = require('winston');
const { logger } = require('../../../src/logger');

const { CML } = require('../../../src/cml');

Expand All @@ -12,7 +12,7 @@ exports.description = `${DESCRIPTION}\n${DOCSURL}`;

exports.handler = async (opts) => {
if (opts.gitlabUploads) {
winston.warn(
logger.warn(
'--gitlab-uploads will be deprecated soon, use --native instead'
);
opts.native = true;
Expand Down
14 changes: 0 additions & 14 deletions bin/cml/comment/create.e2e.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,4 @@ describe('Comment integration tests', () => {
path
);
});

test('cml send-comment to current repo', async () => {
const report = `## Test Comment`;

await fs.writeFile(path, report);
await exec('node', './bin/cml.js', 'send-comment', path);
});

test('cml send-comment --publish to current repo', async () => {
const report = `## Test Comment\n![](assets/logo.png)`;

await fs.writeFile(path, report);
await exec('node', './bin/cml.js', 'send-comment', '--publish', path);
});
});
58 changes: 28 additions & 30 deletions bin/cml/runner/launch.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const fs = require('fs').promises;
const net = require('net');
const kebabcaseKeys = require('kebabcase-keys');
const timestring = require('timestring');
const winston = require('winston');
const { logger } = require('../../../src/logger');

const { exec, randid, sleep } = require('../../../src/utils');
const tf = require('../../../src/terraform');
Expand All @@ -29,26 +29,26 @@ const shutdown = async (opts) => {
if (!RUNNER) return true;

try {
winston.info(`Unregistering runner ${name}...`);
logger.info(`Unregistering runner ${name}...`);
await cml.unregisterRunner({ name });
} catch (err) {
if (err.message.includes('is still running a job')) {
winston.warn(`\tCancelling shutdown: ${err.message}`);
logger.warn(`\tCancelling shutdown: ${err.message}`);
return false;
}

winston.error(`\tFailed: ${err.message}`);
logger.error(`\tFailed: ${err.message}`);
}

RUNNER.kill('SIGINT');
winston.info('\tSuccess');
logger.info('\tSuccess');
return true;
};

const retryWorkflows = async () => {
try {
if (!noRetry && RUNNER_JOBS_RUNNING.length > 0) {
winston.info(`Still pending jobs, retrying workflow...`);
logger.info(`Still pending jobs, retrying workflow...`);

await Promise.all(
RUNNER_JOBS_RUNNING.map(
Expand All @@ -58,14 +58,14 @@ const shutdown = async (opts) => {
);
}
} catch (err) {
winston.error(err);
logger.error(err);
}
};

const destroyLeo = async () => {
if (!tfResource) return;

winston.info(`Waiting ${destroyDelay} seconds to destroy`);
logger.info(`Waiting ${destroyDelay} seconds to destroy`);
await sleep(destroyDelay);

const { cloud, id, region } = JSON.parse(
Expand All @@ -83,7 +83,7 @@ const shutdown = async (opts) => {
id
);
} catch (err) {
winston.error(`\tFailed destroying with LEO: ${err.message}`);
logger.error(`\tFailed destroying with LEO: ${err.message}`);
}
};

Expand All @@ -97,21 +97,21 @@ const shutdown = async (opts) => {
clearInterval(watcher);
await retryWorkflows();
} catch (err) {
winston.error(`Error connecting the SCM: ${err.message}`);
logger.error(`Error connecting the SCM: ${err.message}`);
}
}

await destroyLeo();

if (error) throw error;

winston.info('runner status', { reason, status: 'terminated' });
logger.info('runner status', { reason, status: 'terminated' });
process.exit(0);
};

const runCloud = async (opts) => {
const runTerraform = async (opts) => {
winston.info('Terraform apply...');
logger.info('Terraform apply...');

const { token, repo, driver } = cml;
const {
Expand Down Expand Up @@ -143,7 +143,7 @@ const runCloud = async (opts) => {
await tf.checkMinVersion();

if (gpu === 'tesla')
winston.warn(
logger.warn(
'GPU model "tesla" has been deprecated; please use "v100" instead.'
);

Expand Down Expand Up @@ -189,7 +189,7 @@ const runCloud = async (opts) => {
return tfstate;
};

winston.info('Deploying cloud runner plan...');
logger.info('Deploying cloud runner plan...');
const tfstate = await runTerraform(opts);
const { resources } = tfstate;
for (const resource of resources) {
Expand Down Expand Up @@ -221,14 +221,14 @@ const runCloud = async (opts) => {
timeouts: attributes.timeouts,
kubernetesNodeSelector: attributes.kubernetes_node_selector
};
winston.info(JSON.stringify(nonSensitiveValues));
logger.info(JSON.stringify(nonSensitiveValues));
}
}
}
};

const runLocal = async (opts) => {
winston.info(`Launching ${cml.driver} runner`);
logger.info(`Launching ${cml.driver} runner`);
const {
workdir,
name,
Expand Down Expand Up @@ -265,10 +265,10 @@ const runLocal = async (opts) => {
if (process.platform === 'linux') {
const acpiSock = net.connect('/var/run/acpid.socket');
acpiSock.on('connect', () => {
winston.info('Connected to acpid service.');
logger.info('Connected to acpid service.');
});
acpiSock.on('error', (err) => {
winston.warn(
logger.warn(
`Error connecting to ACPI socket: ${err.message}. The acpid.service helps with instance termination detection.`
);
});
Expand All @@ -283,10 +283,10 @@ const runLocal = async (opts) => {
const dataHandler =
({ cloudSpot }) =>
async (data) => {
winston.debug(data.toString());
logger.debug(data.toString());
const logs = await cml.parseRunnerLog({ data, name, cloudSpot });
for (const log of logs) {
winston.info('runner status', log);
logger.info('runner status', log);

if (log.status === 'job_started') {
const { job: id, pipeline, date } = log;
Expand Down Expand Up @@ -380,7 +380,7 @@ const run = async (opts) => {
throw new Error(
`Runner name ${name} is already in use. Please change the name or terminate the existing runner.`
);
winston.info(`Reusing existing runner named ${name}...`);
logger.info(`Reusing existing runner named ${name}...`);
return;
}

Expand All @@ -390,9 +390,7 @@ const run = async (opts) => {
(runner) => runner.online
)
) {
winston.info(
`Reusing existing online runners with the ${labels} labels...`
);
logger.info(`Reusing existing online runners with the ${labels} labels...`);
return;
}

Expand All @@ -402,33 +400,33 @@ const run = async (opts) => {
'cml runner flag --reuse-idle is unsupported by bitbucket'
);
}
winston.info(
logger.info(
`Checking for existing idle runner matching labels: ${labels}.`
);
const currentRunners = await cml.runnersByLabels({ labels, runners });
const availableRunner = currentRunners.find(
(runner) => runner.online && !runner.busy
);
if (availableRunner) {
winston.info('Found matching idle runner.', availableRunner);
logger.info('Found matching idle runner.', availableRunner);
return;
}
}

if (dockerVolumes.length && cml.driver !== 'gitlab')
winston.warn('Parameters --docker-volumes is only supported in gitlab');
logger.warn('Parameters --docker-volumes is only supported in gitlab');

if (cml.driver === 'github')
winston.warn(
logger.warn(
'Github Actions timeout has been updated from 72h to 35 days. Update your workflow accordingly to be able to restart it automatically.'
);

if (RUNNER_NAME)
winston.warn(
logger.warn(
'ignoring RUNNER_NAME environment variable, use CML_RUNNER_NAME or --name instead'
);

winston.info(`Preparing workdir ${workdir}...`);
logger.info(`Preparing workdir ${workdir}...`);
await fs.mkdir(workdir, { recursive: true });
await fs.chmod(workdir, '766');

Expand Down
2 changes: 1 addition & 1 deletion bin/cml/tensorboard.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
exports.command = 'tensorboard';
exports.description = 'Manage tensorboard.dev connections';
exports.description = false;
exports.builder = (yargs) =>
yargs
.options({
Expand Down
8 changes: 4 additions & 4 deletions bin/cml/tensorboard/connect.e2e.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const rmTbDevExperiment = async (tbOutput) => {
};

describe('tbLink', () => {
test('timeout without result throws exception', async () => {
test.skip('timeout without result throws exception', async () => {
const stdout = tempy.file({ extension: 'log' });
const stderror = tempy.file({ extension: 'log' });
const message = 'there is an error';
Expand All @@ -37,7 +37,7 @@ describe('tbLink', () => {
expect(error.message).toBe(`Tensorboard took too long`);
});

test('valid url is returned', async () => {
test.skip('valid url is returned', async () => {
const stdout = tempy.file({ extension: 'log' });
const stderror = tempy.file({ extension: 'log' });
const message = 'https://iterative.ai';
Expand All @@ -51,7 +51,7 @@ describe('tbLink', () => {
});

describe('CML e2e', () => {
test('cml tensorboard-dev --md returns md and after command TB is still up', async () => {
test.skip('cml tensorboard-dev --md returns md and after command TB is still up', async () => {
const name = 'My experiment';
const desc = 'Test experiment';
const title = 'go to the experiment';
Expand Down Expand Up @@ -80,7 +80,7 @@ describe('CML e2e', () => {
expect(output.includes('cml=tb')).toBe(true);
});

test('cml tensorboard-dev invalid creds', async () => {
test.skip('cml tensorboard-dev invalid creds', async () => {
try {
await exec(
'node',
Expand Down
Loading

0 comments on commit 24ca0df

Please sign in to comment.