Skip to content

Commit

Permalink
Merge pull request #5 from Zaryob/dev
Browse files Browse the repository at this point in the history
Merge from "dev"
  • Loading branch information
Zaryob authored May 28, 2024
2 parents c929245 + 367a67e commit cff2737
Show file tree
Hide file tree
Showing 13 changed files with 186 additions and 15 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/iksemel_basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Install build dependencies
run: |
sudo apt-get update
sudo apt-get install --yes -qq build-essential autoconf libtool git python3 python3-dev python3-pip python3-setuptools
sudo apt-get install --yes -qq build-essential autoconf libtool git python3 python3-dev python3-setuptools gnutls-dev gnutls-bin valgrind
- name: Install meson via pip
run: |
sudo pip3 install meson
Expand All @@ -28,6 +28,10 @@ jobs:
- name: Ninja tests
run: |
ninja test -C build
- name: Valgrind tests
run: |
cd build
meson test --wrap='valgrind --leak-check=full --error-exitcode=1' --no-rebuild
- name: Prepare artifacts
if: failure()
run: |
Expand All @@ -38,5 +42,6 @@ jobs:
name: Logs
path: |
/home/runner/work/iksemel/iksemel/build/meson-logs/meson-log.txt
/home/runner/work/iksemel/iksemel/build/meson-logs/testlog-valgrind.txt
if-no-files-found: ignore

5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ src/iksemel.egg-info/dependency_links.txt
src/iksemel.egg-info/SOURCES.txt
src/iksemel.egg-info/PKG-INFO
src/iksemel.egg-info/top_level.txt

*.patch

# VSCode
.vscode/
2 changes: 1 addition & 1 deletion include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ extern int errno;

#include "finetune.h"

#endif // __COMMON_H
#endif // __COMMON_H
2 changes: 1 addition & 1 deletion include/iksemel.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ enum ikserror {
};

enum ikstagtype {
IKS_OPEN,
IKS_OPEN = 0,
IKS_CLOSE,
IKS_SINGLE
};
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ pkgconf.set('VERSION', version)

pkg_install_dir = '@0@/pkgconfig'.format(get_option('libdir'))

configure_file(input : 'iksemel.pc.in'.format(version),
configure_file(input : 'iksemel.pc.in',
output : 'iksemel-@[email protected]'.format(version),
configuration : pkgconf,
install_dir : pkg_install_dir)
111 changes: 108 additions & 3 deletions src/iks.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
** modify it under the terms of GNU Lesser General Public License.
*/

#include <ctype.h>
#include <stdbool.h>
#include "common.h"
#include "iksemel.h"

Expand Down Expand Up @@ -562,6 +564,11 @@ iks_has_attribs (iks *x)
}

/***** Serializing *****/
static bool
is_print(char c)
{
return isprint(c) || c == '\t' || c == '\n' || c == '\r';
}

static size_t
escape_size (char *src, size_t len)
Expand All @@ -573,6 +580,43 @@ escape_size (char *src, size_t len)
sz = 0;
for (i = 0; i < len; i++) {
c = src[i];


if( c < 0 || c > 127 ){

// Convert UTF-8 bytes to Unicode code point
unsigned char *ptr = (unsigned char *)(src+i);

if( *ptr >= 0x80 && *ptr <= 0x9F ){
continue;
}
else if ((*ptr & 0xE8) == 0xC0) {
sz += 2 * 1 + 4;
i++;
continue;
}
else if ((*ptr & 0xF0) == 0xE0) {
sz += 2 * 3 + 4;
i+=2;
continue;
}
else if ((*ptr & 0xF8) == 0xF0) {
sz += 2 * 4 + 4;
i+=3;
continue;
}
else {
sz += 6;
continue;
}

}

if (!is_print(c)) {
sz += 6;
continue;
}

switch (c) {
case '&': sz += 5; break;
case '\'': sz += 6; break;
Expand All @@ -597,11 +641,72 @@ static char *
escape (char *dest, char *src, size_t len)
{
char c;
int i;
int j = 0;

size_t i;
size_t j = 0;


for (i = 0; i < len; i++) {
c = src[i];

// handle non-ascii characters
if (!is_print(c)) {

if (i - j > 0) dest = my_strcat(dest, src + j, i - j);
j = i + 1;

char buf[13] = {0};

if( c < 0 || c > 127){
// Convert UTF-8 bytes to Unicode code point

unsigned int unicode_code_point = 0;
unsigned char *ptr = (unsigned char *)(src+i);

// Invalid characters
if( *ptr >= 0x80 && *ptr <= 0x9F ){
continue;
}
// 2 bytes
else if ((*ptr & 0xE8) == 0xC0) {
unicode_code_point = ((*ptr & 0x1F) << 6) | (*(ptr + 1) & 0x3F);
dest = my_strcat(dest, buf, snprintf(buf, sizeof buf, "&#x%02x;", unicode_code_point));
i++;
j = i + 1;
continue;
}
// 3 bytes
else if ((*ptr & 0xF0) == 0xE0) {
unicode_code_point = ((*ptr & 0x0F) << 12) | ((*(ptr + 1) & 0x3F) << 6) | (*(ptr + 2) & 0x3F);
dest = my_strcat(dest, buf, snprintf(buf, sizeof buf, "&#x%02x;", unicode_code_point));
i+=2;
j = i + 1;
continue;
}
// 4 bytes
else if ((*ptr & 0xF8) == 0xF0) {
unicode_code_point = ((*ptr & 0x07) << 18) | ((*(ptr + 1) & 0x3F) << 12) | ((*(ptr + 2) & 0x3F) << 6) | (*(ptr + 3) & 0x3F);
dest = my_strcat(dest, buf, snprintf(buf, sizeof buf, "&#x%02x;", unicode_code_point));
i+=3;
j = i + 1;
continue;
}
else {
dest = my_strcat(dest, buf, snprintf(buf, sizeof buf, "&#x%02x;", c));
continue;
}


}
else{
if(c != 0x00){
dest = my_strcat(dest, buf, snprintf(buf, sizeof buf, "&#x%02x;", c));
continue;
}
}

}

if ('&' == c || '<' == c || '>' == c || '\'' == c || '"' == c) {
if (i - j > 0) dest = my_strcat (dest, src + j, i - j);
j = i + 1;
Expand Down Expand Up @@ -632,7 +737,7 @@ iks_string (ikstack *s, iks *x)
if (s) {
return iks_stack_strdup (s, IKS_CDATA_CDATA (x), IKS_CDATA_LEN (x));
} else {
ret = iks_malloc (IKS_CDATA_LEN (x) + 1);
ret = iks_malloc (IKS_CDATA_LEN (x) + 1) ;
memcpy (ret, IKS_CDATA_CDATA (x), IKS_CDATA_LEN (x) + 1);
return ret;
}
Expand Down
7 changes: 5 additions & 2 deletions src/ikstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,28 @@
#include "common.h"
#include "iksemel.h"

struct align_test { char a; double b; };
typedef double align_type ;
struct align_test { char a; align_type b; };
#define DEFAULT_ALIGNMENT ((size_t) ((char *) &((struct align_test *) 0)->b - (char *) 0))
#define ALIGN_MASK ( DEFAULT_ALIGNMENT - 1 )
#define MIN_CHUNK_SIZE ( DEFAULT_ALIGNMENT * 8 )
#define MIN_ALLOC_SIZE DEFAULT_ALIGNMENT
#define ALIGN(x) ( (x) + (DEFAULT_ALIGNMENT - ( (x) & ALIGN_MASK)) )
#define ALIGN(x) ( ((x) + DEFAULT_ALIGNMENT - 1) & ~ALIGN_MASK )

typedef struct ikschunk_struct {
struct ikschunk_struct *next;
size_t size;
size_t used;
size_t last;
align_type align[0] ; // Align data, and ensure struct size matches alignment
char data[4];
} ikschunk;

struct ikstack_struct {
size_t allocated;
ikschunk *meta;
ikschunk *data;
align_type align[0] ; // Ensure struct size matches alignment
};

static ikschunk *
Expand Down
2 changes: 1 addition & 1 deletion src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ if backends.length() != 0

pkg_install_dir = '@0@/pkgconfig'.format(get_option('libdir'))

configure_file(input : 'jabber.pc.in'.format(version),
configure_file(input : 'jabber.pc.in',
output : 'jabber-@[email protected]'.format(version),
configuration : jabber_pkgconf,
install_dir : pkg_install_dir)
Expand Down
2 changes: 1 addition & 1 deletion src/sax.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ stack_expand (iksparser *prs, int len)
prs->stack_max = need;
prs->tag_name += diff;
if (prs->attflag != 0) {
int i = 0;
unsigned int i = 0;
while (i < (prs->attmax * 2)) {
if (prs->atts[i]) prs->atts[i] += diff;
i++;
Expand Down
4 changes: 2 additions & 2 deletions src/utility.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ char *
iks_escape (ikstack *s, char *src, size_t len)
{
char *ret;
int i, j, nlen;
size_t i, j, nlen;

if (!src || !s) return NULL;
if (len == -1) len = strlen (src);
Expand Down Expand Up @@ -138,7 +138,7 @@ iks_escape (ikstack *s, char *src, size_t len)
char *
iks_unescape (ikstack *s, char *src, size_t len)
{
int i,j;
size_t i,j;
char *ret;

if (!s || !src) return NULL;
Expand Down
5 changes: 5 additions & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ iks_test = executable('test-iks', 'tst-iks.c',
include_directories : include_dir,
link_with : libiksemel)

iks_test_utf8 = executable('test-iks-utf8', 'tst-iks-utf8.c',
include_directories : include_dir,
link_with : libiksemel)

ikstack_test = executable('test-ikstack', 'tst-ikstack.c',
include_directories : include_dir,
link_with : libiksemel)
Expand All @@ -24,6 +28,7 @@ sha_test = executable('test-sha', 'tst-sha.c',

test('Iksemel Test Suite - DOM Test', dom_test, is_parallel : true)
test('Iksemel Test Suite - IKS Test', iks_test, is_parallel : true)
test('Iksemel Test Suite - IKS UTF-8 Test', iks_test_utf8, is_parallel : true)
test('Iksemel Test Suite - IkStack Test', ikstack_test, is_parallel : true)
test('Iksemel Test Suite - MD5 Test', md5_test, is_parallel : true)
test('Iksemel Test Suite - SAX Test', sax_test, is_parallel : true)
Expand Down
48 changes: 48 additions & 0 deletions test/tst-iks-utf8.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* iksemel (XML parser for Jabber)
** Copyright (C) 2000-2003 Gurer Ozen
** This code is free software; you can redistribute it and/or
** modify it under the terms of GNU Lesser General Public License.
*/

#include <stdio.h>
#include <string.h>
#include <locale.h>

#include "iksemel.h"

int main (int argc, char *argv[])
{
setlocale (LC_ALL, "");
static char xml[] = "<test>"
"<text>Hello, &#x4e16;&#x754c;</text>"
"<emoji>&#x1f4a9;</emoji>"
"<invalid></invalid>"
"<null></null>"
"<twoun>&#xc5;</twoun>"
"<threeun>&#x100;</threeun>"
"<katana>&#x30bb;</katana>"
"<wideunicode>&#x26007;</wideunicode>"
"<nonprint>&#x01;&#x07;&#x0b;&#x7f;</nonprint>"
"</test>";

iks *x = iks_new ("test");
iks_insert_cdata (iks_insert (x, "text"), "Hello, 世界", 13);
iks_insert_cdata (iks_insert (x, "emoji"), "\U0001F4A9", 4);
iks_insert_cdata (iks_insert (x, "invalid"), "\x80\x81", 2);
iks_insert_cdata (iks_insert (x, "null"), "\0", 1);
iks_insert_cdata (iks_insert (x, "twoun"), "Å", 3);
iks_insert_cdata (iks_insert (x, "threeun"), "Ā", 3);
iks_insert_cdata (iks_insert (x, "katana"), "セ", 4);
iks_insert_cdata (iks_insert (x, "wideunicode"), "\U00026007", 4);
iks_insert_cdata (iks_insert (x, "nonprint"), "\x1\a\v\x7F", 4);

char *t = iks_string (iks_stack (x), x);
if(!t || strcmp(t, xml) != 0) {
printf("Result: %s\n", t);
printf("Expected: %s\n", xml);
return 1;
}
iks_delete(x);

return 0;
}
4 changes: 2 additions & 2 deletions test/tst-sax.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ tagHook (void *udata, char *name, char **atts, int type)

if (!tester.cur) TAG_FAIL;
if (tester.cur->type != IKS_TAG) TAG_FAIL;
if (tester.cur->tag != type) TAG_FAIL;
if ((int)tester.cur->tag != type) TAG_FAIL;
if (iks_strcmp (tester.cur->name, name) != 0) TAG_FAIL;
if (!atts && tester.cur->nr_atts > 0) TAG_FAIL;
if (atts && tester.cur->nr_atts == 0) TAG_FAIL;
Expand All @@ -177,7 +177,7 @@ tagHook (void *udata, char *name, char **atts, int type)
void
debug_cdata (char *data, size_t len, int pos)
{
int i;
size_t i;

PRINT_TEST;
if (tester.cur && tester.cur->type == IKS_CDATA)
Expand Down

0 comments on commit cff2737

Please sign in to comment.