Skip to content

Commit

Permalink
Work on #463.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Apr 12, 2018
1 parent 53901a3 commit 0371cae
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 33 deletions.
9 changes: 9 additions & 0 deletions src/metadataparsers/csv/DcToCsv.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,20 @@ public function __construct($settings)
// Call Metadata.php contructor
parent::__construct($settings);

if (isset($this->settings['WRITER']['metadata_only'])) {
$this->metadata_only = $this->settings['WRITER']['metadata_only'];
} else {
$this->metadata_only = false;
}

// The CSV writer that writes out object metadata is instantiated in the writer.
$headings = $this->settings['METADATA_PARSER']['dc_elements'];
array_unshift($headings, $this->settings['METADATA_PARSER']['record_key']);
$output_file_path = $this->settings['WRITER']['output_file'];
$output_csv_writer = Writer::createFromPath($output_file_path, 'a');
if (!$this->metadata_only) {
array_push($headings, 'File');
}
$output_csv_writer->insertOne($headings);
}

Expand Down
73 changes: 40 additions & 33 deletions src/writers/OaipmhCsv.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,48 +75,55 @@ public function writePackages($metadata, $pages, $record_id)
$output_path = $this->outputDirectory . DIRECTORY_SEPARATOR;

$normalized_record_id = $this->normalizeFilename($record_id);
$this->writeMetadataFile($metadata);

if ($this->metadata_only) {
return;
if (!$this->metadata_only) {
// Retrieve the file associated with the document and write it to the output
// folder using the filename or record_id identifier
$source_file_url = $this->fileGetter->getFilePath($record_id);
// Retrieve the PDF, etc. using Guzzle.
if ($source_file_url) {
$client = new Client();
$response = $client->get(
$source_file_url,
['stream' => true,
'timeout' => $this->httpTimeout,
'connect_timeout' => $this->httpTimeout,
'verify' => $this->verifyCA]
);

// Lazy MimeType => extension mapping: use the last part of the MimeType.
$content_types = $response->getHeader('Content-Type');
list($type, $extension) = explode('/', $content_types[0]);
$extension = preg_replace('/;.*$/', '', $extension);

$content_file_path = $output_path . $normalized_record_id . '.' . $extension;

$body = $response->getBody();
while (!$body->eof()) {
file_put_contents($content_file_path, $body->read(2048), FILE_APPEND);
}
} else {
$this->log->addWarning(
"No content file found in OAI-PMH record",
array('record' => $record_id)
);
}
}

// Retrieve the file associated with the document and write it to the output
// folder using the filename or record_id identifier
$source_file_url = $this->fileGetter->getFilePath($record_id);
// Retrieve the PDF, etc. using Guzzle.
if ($source_file_url) {
$client = new Client();
$response = $client->get(
$source_file_url,
['stream' => true,
'timeout' => $this->httpTimeout,
'connect_timeout' => $this->httpTimeout,
'verify' => $this->verifyCA]
);

// Lazy MimeType => extension mapping: use the last part of the MimeType.
$content_types = $response->getHeader('Content-Type');
list($type, $extension) = explode('/', $content_types[0]);
$extension = preg_replace('/;.*$/', '', $extension);

$content_file_path = $output_path . $normalized_record_id . '.' . $extension;

$body = $response->getBody();
while (!$body->eof()) {
file_put_contents($content_file_path, $body->read(2048), FILE_APPEND);
}
} else {
$this->log->addWarning(
"No content file found in OAI-PMH record",
array('record' => $record_id)
);

if (!$this->metadata_only) {
array_push($metadata, $normalized_record_id . '.' . $extension);
}
$this->writeMetadataFile($metadata);
}

/**
* Adds a row to CSV file (unlike other Writers' writeMetadataFile(),
* which writes out an entire metadata XML file.
*
* @param array $metadata
* An array of values to add to the CSV file, matching the order of
* the column headings.
*/
public function writeMetadataFile($metadata, $output_file_path = '')
{
Expand Down

0 comments on commit 0371cae

Please sign in to comment.