Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coradoc integration #92

Merged
merged 4 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source "https://rubygems.org"

git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }

# Specify your gem's dependencies in reverse_adoc.gemspec
gemspec
14 changes: 7 additions & 7 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
require 'bundler/gem_tasks'
require "bundler/gem_tasks"

if File.exist?('.codeclimate')
ENV["CODECLIMATE_REPO_TOKEN"] = File.read('.codeclimate').strip
if File.exist?(".codeclimate")
ENV["CODECLIMATE_REPO_TOKEN"] = File.read(".codeclimate").strip
end

require 'rspec/core/rake_task'
require "rspec/core/rake_task"
RSpec::Core::RakeTask.new(:spec)
task :default => :spec
task default: :spec

desc 'Open an irb session preloaded with this library'
desc "Open an irb session preloaded with this library"
task :console do
sh 'irb -rubygems -I lib -r reverse_adoc.rb'
sh "irb -rubygems -I lib -r reverse_adoc.rb"
end
22 changes: 11 additions & 11 deletions exe/reverse_adoc
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
#!/usr/bin/env ruby
# Usage: reverse_adoc [FILE]...
# Usage: cat FILE | reverse_adoc
require 'rubygems'
require 'bundler/setup'
require "rubygems"
require "bundler/setup"

require 'reverse_adoc'
require 'optparse'
require 'fileutils'
require "reverse_adoc"
require "optparse"
require "fileutils"

OptionParser.new do |opts|
opts.banner = "Usage: reverse_adoc [options] <file>"
opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v|
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
ReverseAdoc.config.mathml2asciimath = true
end

opts.on('-oFILENAME', '--output=FILENAME', 'Output file to write to') do |v|
opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
ReverseAdoc.config.destination = File.expand_path(v)
# puts "output goes to #{ReverseAdoc.config.destination}"
end

opts.on('-e', '--external-images', 'Export images if data URI') do |v|
opts.on("-e", "--external-images", "Export images if data URI") do |_v|
ReverseAdoc.config.external_images = true
end

opts.on('-u', '--unknown_tags [pass_through, drop, bypass, raise]', 'Unknown tag handling (default: pass_through)') do |v|
opts.on("-u", "--unknown_tags [pass_through, drop, bypass, raise]",
"Unknown tag handling (default: pass_through)") do |v|
ReverseAdoc.config.unknown_tags = v
end

opts.on('-v', '--version', 'Version information') do |v|
opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{ReverseAdoc::VERSION}"
exit
end
Expand All @@ -36,7 +37,6 @@ OptionParser.new do |opts|
puts opts
exit
end

end.parse!

if filename = ARGV.pop
Expand Down
31 changes: 15 additions & 16 deletions exe/w2a
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'rubygems'
require 'bundler/setup'
require "rubygems"
require "bundler/setup"

require 'word-to-markdown'
require 'optparse'
require 'reverse_adoc'
require "word-to-markdown"
require "optparse"
require "reverse_adoc"

ARGV.push('-h') if ARGV.empty?
ARGV.push("-h") if ARGV.empty?

OptionParser.new do |opts|
opts.banner = "Usage: w2a [options] <file>"
opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v|
opts.on("-m", "--mathml2asciimath", "Convert MathML to AsciiMath") do |_v|
ReverseAdoc.config.mathml2asciimath = true
end

opts.on('-oFILENAME', '--output=FILENAME', 'Output file to write to') do |v|
opts.on("-oFILENAME", "--output=FILENAME", "Output file to write to") do |v|
ReverseAdoc.config.destination = File.expand_path(v)
# puts "output goes to #{ReverseAdoc.config.destination}"
end

opts.on('-e', '--external-images', 'Export images if data URI') do |v|
opts.on("-e", "--external-images", "Export images if data URI") do |_v|
ReverseAdoc.config.external_images = true
end

opts.on('-v', '--version', 'Version information') do |v|
opts.on("-v", "--version", "Version information") do |_v|
puts "reverse_adoc: v#{ReverseAdoc::VERSION}"
puts "[dependency] WordToMarkdown: v#{WordToMarkdown::VERSION}"
unless Gem.win_platform?
puts "[dependency] LibreOffice: v#{WordToMarkdown.soffice.version}"
else
if Gem.win_platform?
puts "[dependency] LibreOffice: version not available on Windows"
else
puts "[dependency] LibreOffice: v#{WordToMarkdown.soffice.version}"
end
exit
end
Expand All @@ -40,7 +40,6 @@ OptionParser.new do |opts|
puts opts
exit
end

end.parse!

filename = ARGV.pop
Expand All @@ -53,10 +52,10 @@ end
ReverseAdoc.config.sourcedir = Dir.mktmpdir

doc = WordToMarkdown.new(filename, ReverseAdoc.config.sourcedir)
#File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
# File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
adoc_content = ReverseAdoc.convert(
ReverseAdoc.cleaner.preprocess_word_html(doc.document.html),
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS,
)
# puts scrub_whitespace(doc.document.html)

Expand Down
18 changes: 9 additions & 9 deletions lib/reverse_adoc.rb
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# frozen_string_literal: true

require 'digest'
require 'nokogiri'
require 'reverse_adoc/version'
require 'reverse_adoc/errors'
require 'reverse_adoc/cleaner'
require 'reverse_adoc/config'
require 'reverse_adoc/converters'
require 'reverse_adoc/converters/base'
require "reverse_adoc/html_converter"
require "digest"
require "nokogiri"
require_relative "reverse_adoc/version"
require_relative "reverse_adoc/errors"
require_relative "reverse_adoc/cleaner"
require_relative "reverse_adoc/config"
require_relative "reverse_adoc/converters"
require_relative "reverse_adoc/converters/base"
require_relative "reverse_adoc/html_converter"

module ReverseAdoc
def self.convert(input, options = {})
Expand Down
35 changes: 17 additions & 18 deletions lib/reverse_adoc/cleaner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,21 @@ def remove_inner_whitespaces(string)
# in the border area.
# Same for underscores and brackets.
def clean_tag_borders(string)
result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
preserve_border_whitespaces(match, default_border: ReverseAdoc.config.tag_border) do
match.strip.sub("** ", "**").sub(" **", "**")
end
end
# result = string.gsub(/\s?\*{2,}.*?\*{2,}\s?/) do |match|
# preserve_border_whitespaces(match, default_border: ReverseAdoc.config.tag_border) do
# match.strip.sub("** ", "**").sub(" **", "**")
# end
# end

result = result.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
preserve_border_whitespaces(match, default_border: ReverseAdoc.config.tag_border) do
match.strip.sub("__ ", "__").sub(" __", "__")
end
end
# result = string.gsub(/\s?_{2,}.*?_{2,}\s?/) do |match|
# preserve_border_whitespaces(match, default_border: ReverseAdoc.config.tag_border) do
# match.strip.sub("__ ", "__").sub(" __", "__")
# end
# end

result = result.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
preserve_border_whitespaces(match, default_border: ReverseAdoc.config.tag_border) do
result = string.gsub(/\s?~{2,}.*?~{2,}\s?/) do |match|
preserve_border_whitespaces(match,
default_border: ReverseAdoc.config.tag_border) do
match.strip.sub("~~ ", "~~").sub(" ~~", "~~")
end
end
Expand All @@ -60,7 +61,7 @@ def clean_tag_borders(string)
end

def clean_punctuation_characters(string)
string.gsub(/(\*\*|~~|__)\s([.!?'"])/, "\\1".strip + "\\2")
string.gsub(/(\*\*|~~|__)\s([.!?'"])/, "#{'\\1'.strip}\\2")
end

# preprocesses HTML, rather than postprocessing it
Expand Down Expand Up @@ -104,11 +105,9 @@ def preserve_border_whitespaces(string, options = {})
end

def present_or_default(string, default)
if string.nil? || string.empty?
default
else
string
end
return default if string.nil? || string.empty?

string
end
end
end
14 changes: 7 additions & 7 deletions lib/reverse_adoc/config.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
require 'tmpdir'
require "tmpdir"

module ReverseAdoc
class Config
attr_accessor :unknown_tags, :tag_border, :mathml2asciimath, :external_images,
:destination, :sourcedir, :image_counter, :image_counter_pattern, :input_format
:destination, :sourcedir, :image_counter, :image_counter_pattern, :input_format

def initialize
@unknown_tags = :pass_through
Expand All @@ -18,14 +18,14 @@ def initialize
# @sourcedir = nil

# Image counter, assuming there are max 999 images
@image_counter = 1
@image_counter = 1
# pad with 0s
@image_counter_pattern = '%03d'
@image_counter_pattern = "%03d"

@em_delimiter = '_'.freeze
@strong_delimiter = '*'.freeze
@em_delimiter = "_".freeze
@strong_delimiter = "*".freeze
@inline_options = {}
@tag_border = ' '.freeze
@tag_border = " ".freeze
end

def with(options = {})
Expand Down
5 changes: 2 additions & 3 deletions lib/reverse_adoc/converters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ def self.lookup(tag_name)
@@converters[tag_name.to_sym] or default_converter(tag_name)
end

private

def self.default_converter(tag_name)
case ReverseAdoc.config.unknown_tags.to_sym
when :pass_through
Expand All @@ -26,7 +24,8 @@ def self.default_converter(tag_name)
when :raise
raise UnknownTagError, "unknown tag: #{tag_name}"
else
raise InvalidConfigurationError, "unknown value #{ReverseAdoc.config.unknown_tags.inspect} for ReverseAdoc.config.unknown_tags"
raise InvalidConfigurationError,
"unknown value #{ReverseAdoc.config.unknown_tags.inspect} for ReverseAdoc.config.unknown_tags"
end
end
end
Expand Down
48 changes: 24 additions & 24 deletions lib/reverse_adoc/converters/a.rb
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
require "uri"
require "coradoc"

module ReverseAdoc
module Converters
class A < Base
def convert(node, state = {})
def to_coradoc(node, state = {})
name = treat_children(node, state)
href = node['href']

href = node["href"]
title = extract_title(node)
id = node['id'] || node['name']
id = node["id"] || node["name"]

id = id&.gsub(/\s/, "")&.gsub(/__+/, "_")

if /^_Toc\d+$|^_GoBack$/.match id
""
elsif !id.nil? && !id.empty?
"[[#{id}]]"
elsif href.to_s.start_with?('#')
return "" if /^_Toc\d+$|^_GoBack$/.match?(id)

if !id.nil? && !id.empty?
return Coradoc::Element::Inline::Anchor.new(id)
end

if href.to_s.start_with?("#")
href = href.sub(/^#/, "").gsub(/\s/, "").gsub(/__+/, "_")
if name.empty?
"<<#{href}>>"
else
"<<#{href},#{name}>>"
end
elsif href.to_s.empty?
name
else
name = title if name.empty?
href = "link:#{href}" unless href.to_s =~ URI::DEFAULT_PARSER.make_regexp
link = "#{href}[#{name}]"
link.prepend(' ')
link
return Coradoc::Element::Inline::CrossReference.new(href, name)
end
end

private
if href.to_s.empty?
return name
end

Coradoc::Element::Inline::Link.new(path: href,
name: name,
title: title)
end

def convert(node, state = {})
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
end
end

register :a, A.new
Expand Down
10 changes: 6 additions & 4 deletions lib/reverse_adoc/converters/aside.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
module ReverseAdoc
module Converters
class Aside < Base
def to_coradoc(node, state = {})
content = treat_children(node, state)
Coradoc::Element::Block::Side.new(lines: content.lines)
end

def convert(node, state = {})
id = node['id']
anchor = id ? "[[#{id}]]\n" : ""
content = treat_children(node, state).strip
"\n\n****\n" << treat_children(node, state) << "\n****\n\n"
Coradoc::Generator.gen_adoc(to_coradoc(node, state))
end
end

Expand Down
Loading
Loading