Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated fix for #1652: autodetect line breaks type in editor #49

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/editor/edit-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ gboolean edit_save_block (WEdit * edit, const char *filename, off_t start, off_t
gboolean edit_save_block_cmd (WEdit * edit);
gboolean edit_insert_file_cmd (WEdit * edit);

off_t edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath);
off_t edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type);
gboolean edit_load_back_cmd (WEdit * edit);
gboolean edit_load_forward_cmd (WEdit * edit);
void edit_block_process_cmd (WEdit * edit, int macro_number);
Expand Down
106 changes: 101 additions & 5 deletions src/editor/edit.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ int option_line_state = 0;
int option_line_state_width = 0;
gboolean option_cursor_after_inserted_block = FALSE;
int option_state_full_filename = 0;
int option_autodetect_lb = 0;

int option_edit_right_extreme = 0;
int option_edit_left_extreme = 0;
Expand All @@ -114,6 +115,8 @@ const char VERTICAL_MAGIC[] = { '\1', '\1', '\1', '\1', '\n' };

#define space_width 1

#define DETECT_LB_TYPE_BUFLEN BUF_MEDIUM

/*** file scope type declarations ****************************************************************/

/*** file scope variables ************************************************************************/
Expand Down Expand Up @@ -378,6 +381,81 @@ check_file_access (WEdit * edit, const vfs_path_t * filename_vpath, struct stat

/* --------------------------------------------------------------------------------------------- */

/**
* detect type of line breaks
*
*/
/* --------------------------------------------------------------------------------------------- */

static LineBreaks
detect_lb_type_buf (unsigned char *p, ssize_t sz)
{
LineBreaks detected_lb = LB_ASIS;

/* If there was error or file too short, give up */
if (sz <= 2)
return LB_ASIS;

p[(size_t) sz] = '\0';
/* Avoid ambiguity of our buffer breaking CR LF sequence */
if (p[sz - 1] == '\r') {
p[--sz] = '\0';
}

for (; sz--; p++) {
LineBreaks new_lb = LB_ASIS;
if (*p == '\r') {
if (p[1] == '\n') {
sz--; p++;
new_lb = LB_WIN;
} else {
new_lb = LB_MAC;
}
} else if (*p == '\n') {
/* LF CR is anomaly for text file, give up */
if (p[1] == '\r')
return LB_ASIS;
new_lb = LB_UNIX;
} else if (*p < 0x20 && *p != '\t' && *p != '\f') {
/* The only common special char in text files is tab, much
less commonly - form feed. Anything else - give up. */
return LB_ASIS;
}

/* If we detected a new lb, and it doesn't match previously
detected, give up */
if (new_lb != LB_ASIS) {
if (detected_lb != LB_ASIS && detected_lb != new_lb) {
return LB_ASIS;
}
detected_lb = new_lb;
}
}

/* LB_UNIX means that within buffer, we saw only LF breaks, but
we cannot be sure about entire file. So, go conservative route
and don't report to user in UI that this file has unix line
breaks. */
return detected_lb == LB_UNIX ? LB_ASIS : detected_lb;
}

static LineBreaks
detect_lb_type (const vfs_path_t *filename_vpath)
{
unsigned char buf[BUF_LARGE];
ssize_t file, sz;

file = mc_open (filename_vpath, O_RDONLY | O_BINARY);
if (file == -1)
return LB_ASIS;

sz = mc_read (file, buf, sizeof (buf) - 1);
mc_close (file);

return detect_lb_type_buf (buf, sz);
}

/* --------------------------------------------------------------------------------------------- */
/**
* Open the file and load it into the buffers, either directly or using
* a filter. Return TRUE on success, FALSE on error.
Expand All @@ -394,6 +472,7 @@ static gboolean
edit_load_file (WEdit * edit)
{
gboolean fast_load = TRUE;
LineBreaks lb_type = LB_ASIS;

/* Cannot do fast load if a filter is used */
if (edit_find_filter (edit->filename_vpath) >= 0)
Expand All @@ -418,6 +497,11 @@ edit_load_file (WEdit * edit)
edit_clean (edit);
return FALSE;
}
if (option_autodetect_lb)
lb_type = detect_lb_type (edit->filename_vpath);

if (lb_type != LB_ASIS && lb_type != LB_UNIX)
fast_load = FALSE;
}
else
{
Expand All @@ -443,15 +527,15 @@ edit_load_file (WEdit * edit)
&& *(vfs_path_get_by_index (edit->filename_vpath, 0)->path) != '\0')
{
edit->undo_stack_disable = 1;
if (edit_insert_file (edit, edit->filename_vpath) < 0)
if (edit_insert_file (edit, edit->filename_vpath, lb_type) < 0)
{
edit_clean (edit);
return FALSE;
}
edit->undo_stack_disable = 0;
}
}
edit->lb = LB_ASIS;
edit->lb = lb_type;
return TRUE;
}

Expand Down Expand Up @@ -1783,7 +1867,7 @@ user_menu (WEdit * edit, const char *menu_file, int selected_entry)
{
off_t ins_len;

ins_len = edit_insert_file (edit, block_file_vpath);
ins_len = edit_insert_file (edit, block_file_vpath, LB_ASIS);
if (!nomark && ins_len > 0)
edit_set_markers (edit, start_mark, start_mark + ins_len, 0, 0);
}
Expand Down Expand Up @@ -1937,7 +2021,7 @@ is_break_char (char c)
/** inserts a file at the cursor, returns count of inserted bytes on success */

off_t
edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath)
edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath, LineBreaks lb_type)
{
char *p;
off_t current;
Expand Down Expand Up @@ -2027,7 +2111,19 @@ edit_insert_file (WEdit * edit, const vfs_path_t * filename_vpath)
while ((blocklen = mc_read (file, (char *) buf, TEMP_BUF_LEN)) > 0)
{
for (i = 0; i < blocklen; i++)
edit_insert (edit, buf[i]);
{
if (buf[i] == '\r')
{
if (lb_type == LB_MAC)
edit_insert (edit, '\n');
else if (lb_type == LB_WIN)
/* just skip */ ;
else
edit_insert (edit, '\r');
}
else
edit_insert (edit, buf[i]);
}
}
/* highlight inserted text then not persistent blocks */
if (!option_persistent_selections && edit->modified)
Expand Down
1 change: 1 addition & 0 deletions src/editor/edit.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ extern int option_auto_para_formatting;
extern int option_fill_tabs_with_spaces;
extern int option_return_does_auto_indent;
extern int option_backspace_through_tabs;
extern int option_autodetect_lb;
extern int option_fake_half_tabs;
extern int option_persistent_selections;
extern int option_drop_selection_on_copy;
Expand Down
12 changes: 7 additions & 5 deletions src/editor/editcmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,9 @@ edit_get_save_file_as (WEdit * edit)
{
char *fname;

edit->lb = cur_lb;
/* Don't change current LB type (possibly autodetected), unless user asked to. */
if (cur_lb != LB_ASIS)
edit->lb = cur_lb;
fname = tilde_expand (filename_res);
g_free (filename_res);
ret_vpath = vfs_path_from_str (fname);
Expand Down Expand Up @@ -3013,7 +3015,7 @@ edit_paste_from_X_buf_cmd (WEdit * edit)
/* try use external clipboard utility */
mc_event_raise (MCEVENT_GROUP_CORE, "clipboard_file_from_ext_clip", NULL);
tmp = mc_config_get_full_vpath (EDIT_CLIP_FILE);
ret = (edit_insert_file (edit, tmp) >= 0);
ret = (edit_insert_file (edit, tmp, LB_ASIS) >= 0);
vfs_path_free (tmp);

return ret;
Expand Down Expand Up @@ -3121,7 +3123,7 @@ edit_insert_file_cmd (WEdit * edit)
vfs_path_t *exp_vpath;

exp_vpath = vfs_path_from_str (exp);
ret = (edit_insert_file (edit, exp_vpath) >= 0);
ret = (edit_insert_file (edit, exp_vpath, LB_ASIS) >= 0);
vfs_path_free (exp_vpath);

if (!ret)
Expand Down Expand Up @@ -3199,7 +3201,7 @@ edit_sort_cmd (WEdit * edit)
vfs_path_t *tmp_vpath;

tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE);
edit_insert_file (edit, tmp_vpath);
edit_insert_file (edit, tmp_vpath, LB_ASIS);
vfs_path_free (tmp_vpath);
}
return 0;
Expand Down Expand Up @@ -3246,7 +3248,7 @@ edit_ext_cmd (WEdit * edit)
vfs_path_t *tmp_vpath;

tmp_vpath = mc_config_get_full_vpath (EDIT_TEMP_FILE);
edit_insert_file (edit, tmp_vpath);
edit_insert_file (edit, tmp_vpath, LB_ASIS);
vfs_path_free (tmp_vpath);
}
return 0;
Expand Down
17 changes: 13 additions & 4 deletions src/editor/editdraw.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ status_string (WEdit * edit, char *s, int w)
{
char byte_str[16];

static const char *lb_names[LB_NAMES] = {
"",
"LF",
"CRLF",
"CR"
};

/*
* If we are at the end of file, print <EOF>,
* otherwise print the current character as is (if printable),
Expand Down Expand Up @@ -152,7 +159,7 @@ status_string (WEdit * edit, char *s, int w)
/* The field lengths just prevent the status line from shortening too much */
if (simple_statusbar)
g_snprintf (s, w,
"%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s",
"%c%c%c%c %3ld %5ld/%ld %6ld/%ld %s %s %s",
edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-',
edit->modified ? 'M' : '-',
macro_index < 0 ? '-' : 'R',
Expand All @@ -164,10 +171,11 @@ status_string (WEdit * edit, char *s, int w)
#ifdef HAVE_CHARSET
mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) :
#endif
"");
"",
lb_names[edit->lb]);
else
g_snprintf (s, w,
"[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s",
"[%c%c%c%c] %2ld L:[%3ld+%2ld %3ld/%3ld] *(%-4ld/%4ldb) %s %s %s",
edit->mark1 != edit->mark2 ? (edit->column_highlight ? 'C' : 'B') : '-',
edit->modified ? 'M' : '-',
macro_index < 0 ? '-' : 'R',
Expand All @@ -181,7 +189,8 @@ status_string (WEdit * edit, char *s, int w)
#ifdef HAVE_CHARSET
mc_global.source_codepage >= 0 ? get_codepage_id (mc_global.source_codepage) :
#endif
"");
"",
lb_names[edit->lb]);
}

/* --------------------------------------------------------------------------------------------- */
Expand Down
2 changes: 2 additions & 0 deletions src/editor/editoptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ edit_options_dialog (WDialog * h)
QUICK_STOP_GROUPBOX,
QUICK_SEPARATOR (FALSE),
QUICK_SEPARATOR (FALSE),
QUICK_SEPARATOR (FALSE),
QUICK_START_GROUPBOX (N_("Tabulation")),
QUICK_CHECKBOX (N_("&Fake half tabs"), &option_fake_half_tabs, NULL),
QUICK_CHECKBOX (N_("&Backspace through tabs"), &option_backspace_through_tabs,
Expand All @@ -176,6 +177,7 @@ edit_options_dialog (WDialog * h)
QUICK_CHECKBOX (N_("&Group undo"), &option_group_undo, NULL),
QUICK_LABELED_INPUT (N_("Word wrap line length:"), input_label_left, wrap_length,
"edit-word-wrap", &p, NULL, FALSE, FALSE, INPUT_COMPLETE_NONE),
QUICK_CHECKBOX (N_("&Autodetect line breaks type"), &option_autodetect_lb, NULL),
QUICK_STOP_GROUPBOX,
QUICK_STOP_COLUMNS,
QUICK_BUTTONS_OK_CANCEL,
Expand Down
1 change: 1 addition & 0 deletions src/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ static const struct
{ "editor_word_wrap_line_length", &option_word_wrap_line_length },
{ "editor_fill_tabs_with_spaces", &option_fill_tabs_with_spaces },
{ "editor_return_does_auto_indent", &option_return_does_auto_indent },
{ "editor_autodetect_linebreak", &option_autodetect_lb },
{ "editor_backspace_through_tabs", &option_backspace_through_tabs },
{ "editor_fake_half_tabs", &option_fake_half_tabs },
{ "editor_option_save_mode", &option_save_mode },
Expand Down
11 changes: 9 additions & 2 deletions tests/src/editor/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ AM_CPPFLAGS = \
-DTEST_SHARE_DIR=\"$(abs_srcdir)\" \
$(GLIB_CFLAGS) \
-I$(top_srcdir) \
-I$(top_srcdir)/src/editor \
@CHECK_CFLAGS@

AM_LDFLAGS = @TESTS_LDFLAGS@
Expand All @@ -17,13 +18,19 @@ if ENABLE_VFS_SMB
LIBS += $(top_builddir)/src/vfs/smbfs/helpers/libsamba.a
endif

EXTRA_DIST = mc.charsets test-data.txt.in
EXTRA_DIST = mc.charsets test-data.txt.in \
common_editor_includes.c

CLEANFILES = detect_linebreaks.log

TESTS = \
editcmd__edit_complete_word_cmd
editcmd__edit_complete_word_cmd \
detect_linebreaks

check_PROGRAMS = $(TESTS)

editcmd__edit_complete_word_cmd_SOURCES = \
editcmd__edit_complete_word_cmd.c

detect_linebreaks_SOURCES = \
detect_linebreaks.c
Loading