Skip to content

Commit

Permalink
option to load PST into memory before processing
Browse files Browse the repository at this point in the history
  • Loading branch information
epfromer committed May 25, 2018
1 parent b7c3283 commit 6a1c1b2
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 9 deletions.
11 changes: 11 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@
"console": "integratedTerminal",
"internalConsoleOptions": "openOnSessionStart"
},
{
"name": "test-in-mem.ts",
"type": "node",
"request": "launch",
"args": ["src/test-in-mem.ts"],
"runtimeArgs": ["-r", "ts-node/register"],
"cwd": "${workspaceRoot}",
"protocol": "inspector",
"console": "integratedTerminal",
"internalConsoleOptions": "openOnSessionStart"
},
{
"name": "test-min.ts",
"type": "node",
Expand Down
50 changes: 41 additions & 9 deletions src/PSTFile/PSTFile.class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ export class PSTFile {
return this._pstFileType;
}

private _pstFilename: string;
private _pstFilename: string = '';
public get pstFilename(): string {
return this._pstFilename;
}
Expand All @@ -112,6 +112,9 @@ export class PSTFile {
// file descriptor
private pstFD: number;

// in-memory file buffer (instead of filesystem)
private pstBuffer: Buffer = new Buffer(0);

// position in file
private position: number = 0;

Expand All @@ -120,14 +123,22 @@ export class PSTFile {
* @param {string} fileName
* @memberof PSTFile
*/
public constructor(fileName: string) {
this._pstFilename = fileName;
public constructor(pstBuffer: Buffer);
public constructor(fileName: string);
public constructor(arg: any) {
if (arg instanceof Buffer) {
// use an in-memory buffer of PST
this.pstBuffer = arg;
this.pstFD = -1;
} else {
// use PST in filesystem
this._pstFilename = arg;
this.pstFD = fs.openSync(this._pstFilename, 'r');
}

// attempt to open file
// confirm first 4 bytes are !BDN
this.pstFD = fs.openSync(this._pstFilename, 'r');
let buffer = new Buffer(514);
fs.readSync(this.pstFD, buffer, 0, 514, 0);
this.readSync(buffer, 514, 0);
let key = '!BDN';
if (buffer[0] != key.charCodeAt(0) || buffer[1] != key.charCodeAt(1) || buffer[2] != key.charCodeAt(2) || buffer[3] != key.charCodeAt(3)) {
throw new Error('PSTFile::open Invalid file header (expected: "!BDN"): ' + buffer);
Expand Down Expand Up @@ -161,7 +172,9 @@ export class PSTFile {
* @memberof PSTFile
*/
public close() {
fs.closeSync(this.pstFD);
if (this.pstFD > 0) {
fs.closeSync(this.pstFD);
}
}

/**
Expand Down Expand Up @@ -796,7 +809,7 @@ export class PSTFile {
public read(position?: number): number {
const pos = position ? position : this.position;
const buffer = new Buffer(1);
const bytesRead = fs.readSync(this.pstFD, buffer, 0, buffer.length, pos);
const bytesRead = this.readSync(buffer, buffer.length, pos);
this.position = position ? position + bytesRead : this.position + bytesRead;
return buffer[0];
}
Expand All @@ -810,10 +823,29 @@ export class PSTFile {
*/
public readCompletely(buffer: Buffer, position?: number) {
const pos = position ? position : this.position;
const bytesRead = fs.readSync(this.pstFD, buffer, 0, buffer.length, pos);
const bytesRead = this.readSync(buffer, buffer.length, pos);
this.position = position ? position + bytesRead : this.position + bytesRead;
}

/**
* Read from either file system, or in memory buffer.
* @param {Buffer} buffer
* @param {number} length
* @param {number} position
* @returns {number} of bytes read
* @memberof PSTFile
*/
private readSync(buffer: Buffer, length: number, position: number): number {
if (this.pstFD > 0) {
// read from file system
return fs.readSync(this.pstFD, buffer, 0, length, position);
} else {
// copy from in-memory buffer
this.pstBuffer.copy(buffer, 0, position, position + length);
return length;
}
}

/**
* Seek to a specific position in PST file.
* @param {long} index
Expand Down
273 changes: 273 additions & 0 deletions src/test-in-mem.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
/**
* Copyright 2010-2018 Richard Johnson, Orin Eman & Ed Pfromer
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ---
*
* This file is part of pst-extractor.
*
* pst-extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* pst-extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with pst-extractor. If not, see <http://www.gnu.org/licenses/>.
*/
import { PSTMessage } from './PSTMessage/PSTMessage.class';
import { PSTFile } from './PSTFile/PSTFile.class';
import { PSTFolder } from './PSTFolder/PSTFolder.class';
import { Log } from './Log.class';
import { PSTAttachment } from './PSTAttachment/PSTAttachment.class';
import * as fs from 'fs';
import { PSTRecipient } from './PSTRecipient/PSTRecipient.class';

const pstFolder = '/media/sf_Outlook/test/';
const topOutputFolder = '/media/sf_Outlook/pst-extractor/';
let outputFolder = '';
const saveToFS = false;
const displaySender = true;
const displayRecipients = true;
const displayBody = false;
const verbose = true;
let depth = -1;
let col = 0;

// make a top level folder to hold content
try {
if (saveToFS) {
fs.mkdirSync(topOutputFolder);
}
} catch (err) {
Log.error(err);
}

let directoryListing = fs.readdirSync(pstFolder);
directoryListing.forEach(filename => {
console.log(pstFolder + filename);

// time for performance comparison to Java and improvement
const start = Date.now();

// load file into memory buffer, then open as PSTFile
let pstFile = new PSTFile(fs.readFileSync(pstFolder + filename));

// make a sub folder for each PST
try {
if (saveToFS) {
outputFolder = topOutputFolder + filename + '/';
fs.mkdirSync(outputFolder);
}
} catch (err) {
Log.error(err);
}

console.log(pstFile.getMessageStore().displayName);
processFolder(pstFile.getRootFolder());

const end = Date.now();
console.log('processed in ' + (end - start) + ' ms');
});

/**
* Walk the folder tree recursively and process emails.
* @param {PSTFolder} folder
*/
function processFolder(folder: PSTFolder) {
depth++;

// the root folder doesn't have a display name
if (depth > 0) {
console.log(getDepth(depth) + folder.displayName);
}

// go through the folders...
if (folder.hasSubfolders) {
let childFolders: PSTFolder[] = folder.getSubFolders();
for (let childFolder of childFolders) {
processFolder(childFolder);
}
}

// and now the emails for this folder
if (folder.contentCount > 0) {
depth++;
let email: PSTMessage = folder.getNextChild();
while (email != null) {
if (verbose) {
console.log(getDepth(depth) + 'Email: ' + email.descriptorNodeId + ' - ' + email.subject);
} else {
printDot();
}

// sender
let sender = getSender(email);

// recipients
let recipients = getRecipients(email);

// display body?
if (verbose && displayBody) {
console.log(email.body);
console.log(email.bodyRTF);
}

// save content to fs?
if (saveToFS) {
// create date string in format YYYY-MM-DD
let strDate = '';
let d = email.clientSubmitTime;
if (!d && email.creationTime) {
d = email.creationTime;
}
if (d) {
const month = ('0' + (d.getMonth()+1)).slice(-2);
const day = ('0' + d.getDate()).slice(-2);
strDate = d.getFullYear() + '-' + month + '-' + day;
}

// create a folder for each day (client submit time)
const emailFolder = outputFolder + strDate + '/';
if (!fs.existsSync(emailFolder)) {
try {
fs.mkdirSync(emailFolder);
} catch (err) {
Log.error(err);
}
}

doSaveToFS(email, emailFolder, sender, recipients);
}
email = folder.getNextChild();
}
depth--;
}
depth--;
}

/**
* Save items to filesystem.
* @param {PSTMessage} msg
* @param {string} emailFolder
* @param {string} sender
* @param {string} recipients
*/
function doSaveToFS(msg: PSTMessage, emailFolder: string, sender: string, recipients: string) {
try {
// save the msg as a txt file
const filename = emailFolder + msg.descriptorNodeId + '.txt';
if (verbose) {
console.log('saving msg to ' + filename);
}
const fd = fs.openSync(filename, 'w');
fs.writeSync(fd, msg.clientSubmitTime + '\r\n');
fs.writeSync(fd, 'Type: ' + msg.messageClass + '\r\n');
fs.writeSync(fd, 'From: ' + sender + '\r\n');
fs.writeSync(fd, 'To: ' + recipients + '\r\n');
fs.writeSync(fd, 'Subject: ' + msg.subject);
fs.writeSync(fd, msg.body);
fs.closeSync(fd);
} catch (err) {
Log.error(err);
}

// walk list of attachments and save to fs
for (let i = 0; i < msg.numberOfAttachments; i++) {
const attachment: PSTAttachment = msg.getAttachment(i);
// Log.debug1(JSON.stringify(activity, null, 2));
if (attachment.filename) {
const filename = emailFolder + msg.descriptorNodeId + '-' + attachment.longFilename;
if (verbose) {
console.log('saving attachment to ' + filename);
}
try {
const fd = fs.openSync(filename, 'w');
const attachmentStream = attachment.fileInputStream;
if (attachmentStream) {
const bufferSize = 8176;
const buffer = new Buffer(bufferSize);
let bytesRead;
do {
bytesRead = attachmentStream.read(buffer);
fs.writeSync(fd, buffer, 0, bytesRead);
} while (bytesRead == bufferSize);
fs.closeSync(fd);
}
} catch (err) {
Log.error(err);
}
}
}
}

/**
* Get the sender and display.
* @param {PSTMessage} email
* @returns {string}
*/
function getSender(email: PSTMessage): string {

let sender = email.senderName;
if (sender !== email.senderEmailAddress) {
sender += ' (' + email.senderEmailAddress + ')';
}
if (verbose && displaySender && email.messageClass === 'IPM.Note') {
console.log(getDepth(depth) + ' sender: ' + sender);
}
return sender;
}

/**
* Get the recipients and display.
* @param {PSTMessage} email
* @returns {string}
*/
function getRecipients(email: PSTMessage): string {
// could walk recipients table, but be fast and cheap
return email.displayTo;
}

/**
* Print a dot representing a message.
*/
function printDot() {
process.stdout.write('.');
if (col++ > 100) {
console.log('');
col = 0;
}
}

/**
* Returns a string with visual indicattion of depth in tree.
* @param {number} depth
* @returns {string}
*/
function getDepth(depth: number): string {
let sdepth = '';
if (col > 0) {
col = 0;
sdepth += '\n';
}
for (let x = 0; x < depth - 1; x++) {
sdepth += ' | ';
}
sdepth += ' |- ';
return sdepth;
}

0 comments on commit 6a1c1b2

Please sign in to comment.