Skip to content

Commit 3a761d7

Browse files
committed
Add support for annotation authors
1 parent d78e82a commit 3a761d7

File tree

8 files changed

+106
-3
lines changed

8 files changed

+106
-3
lines changed

catdoc.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ func GetCommentsFromFile(file io.ReadSeeker) (string, error) {
102102
return callWASMFuncWithFile("get_comments", file)
103103
}
104104

105+
func GetAnnotationAuthorsFromFile(file io.ReadSeeker) ([]string, error) {
106+
r, err := callWASMFuncWithFile("get_annotation_authors", file)
107+
if err != nil {
108+
return nil, err
109+
}
110+
return strings.Split(r, "\n"), nil
111+
}
112+
105113
func GetVersion() (string, error) {
106114
return callWASMFunc("get_version", nil)
107115
}

catdoc.wasm

662 Bytes
Binary file not shown.

catdoc/src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ catdoc-wasm: emsdk.uptodate
105105
-I. -O3 -DHAVE_CONFIG_H \
106106
-DCATDOC_VERSION=\"0.95\" \
107107
-DCHARSETPATH=\"charsets\" \
108-
-sEXPORTED_FUNCTIONS=_get_text,_get_author,_get_last_author,_get_version,_get_title,_get_subject,_get_keywords,_get_comments\
108+
-sEXPORTED_FUNCTIONS=_get_text,_get_author,_get_last_author,_get_version,_get_title,_get_subject,_get_keywords,_get_comments,_get_annotation_authors\
109109
-sSTANDALONE_WASM -sWARN_ON_UNDEFINED_SYMBOLS=0 \
110110
--no-entry -sFILESYSTEM=1 -sALLOW_MEMORY_GROWTH -sMAXIMUM_MEMORY=1GB
111111

catdoc/src/analyze.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ extern char ole_sign[], zip_sign[]; /* from ole.c */
1818
char rtf_sign[] = "{\\rtf";
1919
char old_word_sign[] = {0xdb, 0xa5, 0};
2020
char write_sign[] = {0x31, 0xBE, 0};
21+
unsigned char sttbfRMark[] = {0x00, 0x00, 0x07, 0x00, 0x55, 0x00,
22+
0x6E, 0x00, 0x6B, 0x00, 0x6E, 0x00,
23+
0x6F, 0x00, 0x77, 0x00, 0x6E, 0x00};
2124
int verbose = 0;
2225

2326
unsigned char *read_metadata(unsigned char *buffer, metadata metadata_type) {
@@ -38,13 +41,75 @@ unsigned char *read_metadata(unsigned char *buffer, metadata metadata_type) {
3841
return NULL;
3942
}
4043

44+
int find_offset(FILE *f, unsigned char *mark, int mark_length) {
45+
int offset = 0;
46+
int block_size = 1024 * 25;
47+
int pos = 0;
48+
int bpos = 0;
49+
unsigned char buf[block_size];
50+
int block_n = 0;
51+
52+
while (!feof(f)) {
53+
long n = fread(buf, 1, block_size, f);
54+
while (bpos != n) {
55+
if (buf[bpos] == mark[pos]) {
56+
pos++;
57+
bpos++;
58+
if (pos == mark_length) {
59+
offset = block_n * block_size + bpos - pos;
60+
return offset;
61+
}
62+
} else {
63+
bpos -= pos;
64+
bpos++;
65+
pos = 0;
66+
}
67+
}
68+
bpos = 0;
69+
block_n++;
70+
}
71+
fprintf(stderr, "stttbfRMark offset is not found");
72+
exit(1);
73+
}
74+
75+
void read_annotation_authors(FILE *f) {
76+
int offset = find_offset(f, sttbfRMark, 18) - 4;
77+
fseek(f, offset, SEEK_SET);
78+
79+
int block_size = 1024;
80+
unsigned char buf[2];
81+
fread(buf, 1, 2, f);
82+
if (buf[0] != 0xff || buf[1] != 0xff) {
83+
fprintf(stderr, "stttbfRMark offset is invalid");
84+
exit(1);
85+
}
86+
fread(buf, 1, 2, f);
87+
unsigned int count = getshort(buf, 0) - 1;
88+
fseek(f, 18, SEEK_CUR);
89+
for (int i = 0; i < count; i++) {
90+
fread(buf, 1, 2, f);
91+
unsigned int str_len = getshort(buf, 0);
92+
unsigned short *str = calloc(str_len, 2);
93+
fread(str, 2, str_len, f);
94+
for (int j = 0; j < str_len; j++) {
95+
printf("%lc", str[j]);
96+
}
97+
printf("\n");
98+
free(str);
99+
}
100+
}
101+
41102
/*********************************************************************
42103
* Determines format of input file and calls parse_word_header or
43104
* process_file if
44105
* it is word processor file or copy_out if it is plain text file
45106
* return not 0 when error
46107
********************************************************************/
47108
int analyze_format(FILE *f, metadata metadata_type) {
109+
if (metadata_type == annotation_authors) {
110+
read_annotation_authors(f);
111+
return 0;
112+
}
48113
unsigned char buffer[129];
49114
long offset = 0;
50115
FILE *new_file, *ole_file;

catdoc/src/catdoc.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
5656
get_locale_charset();
5757
#endif
5858
metadata metadata_type = none;
59-
while ((c = getopt(argc, argv, "Vls:d:f:taubxv8wALTSKCm:")) != -1) {
59+
while ((c = getopt(argc, argv, "Vls:d:f:taubxv8wALTSKCUm:")) != -1) {
6060
switch (c) {
6161
case 's':
6262
check_charset(&source_csname, optarg);
@@ -104,6 +104,9 @@ int main(int argc, char **argv) {
104104
case 'C':
105105
metadata_type = comments;
106106
break;
107+
case 'U':
108+
metadata_type = annotation_authors;
109+
break;
107110
case 'm': {
108111
char *endptr;
109112
wrap_margin = (int)strtol(optarg, &endptr, 0);
@@ -246,3 +249,8 @@ void get_comments() {
246249
char *args[] = {"", "-C", "/input_file/file.doc"};
247250
main(3, args);
248251
}
252+
253+
void get_annotation_authors() {
254+
char *args[] = {"", "-U", "/input_file/file.doc"};
255+
main(3, args);
256+
}

catdoc/src/catdoc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ void get_title();
227227
void get_subject();
228228
void get_keywords();
229229
void get_comments();
230+
void get_annotation_authors();
230231

231232
char *find_file(char *name, const char *path);
232233
char *stradd(const char *s1, const char *s2);
@@ -255,7 +256,8 @@ typedef enum {
255256
title,
256257
subject,
257258
keywords,
258-
comments
259+
comments,
260+
annotation_authors
259261
} metadata;
260262
int analyze_format(FILE *f, metadata metadata_type);
261263
void list_charsets(void);

catdoc_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ func testFileFunc(title, expected string, fun func(io.ReadSeeker) (string, error
3131
}
3232
}
3333

34+
func testFileFuncArr(title string, expected []string, fun func(io.ReadSeeker) ([]string, error), t *testing.T) {
35+
f, err := os.Open("test.doc")
36+
if err != nil {
37+
t.Fatalf("could not open test document, %v", err)
38+
}
39+
arr, err := fun(f)
40+
if err != nil {
41+
t.Fatalf("expected error to be nil, got %v", err)
42+
}
43+
for i := range expected {
44+
if arr[i] != expected[i] {
45+
t.Fatalf("expected %s to be \"%v\", got %v", title, expected, arr)
46+
}
47+
}
48+
}
49+
3450
func TestGetTextFromFile(t *testing.T) {
3551
testFileFunc("text", "text-inside-doc", gocatdoc.GetTextFromFile, t)
3652
}
@@ -58,3 +74,7 @@ func TestGetKeywordsFromFile(t *testing.T) {
5874
func TestGetCommentsFromFile(t *testing.T) {
5975
testFileFunc("comments", "Comments", gocatdoc.GetCommentsFromFile, t)
6076
}
77+
78+
func TestGetAnnotationAuthorsFromFile(t *testing.T) {
79+
testFileFuncArr("annoation_authors", []string{"H. Potter"}, gocatdoc.GetAnnotationAuthorsFromFile, t)
80+
}

test.doc

1 KB
Binary file not shown.

0 commit comments

Comments
 (0)