From f81b4538a537c4160810b360331834978fd85b09 Mon Sep 17 00:00:00 2001 From: "Kirill Lipatov (Leency)" Date: Tue, 29 Dec 2020 10:31:45 +0000 Subject: [PATCH] WebView 3.29: reworked encodings detection git-svn-id: svn://kolibrios.org@8492 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/cmm/browser/TWB/TWB.c | 51 +++++----- programs/cmm/browser/TWB/render.h | 34 +++---- programs/cmm/browser/TWB/set_style.h | 141 +++++++++++++++++---------- programs/cmm/browser/WebView.c | 18 ++-- programs/cmm/browser/cache.h | 6 +- programs/cmm/browser/const.h | 16 +-- programs/cmm/browser/tabs.h | 2 + 7 files changed, 160 insertions(+), 108 deletions(-) diff --git a/programs/cmm/browser/TWB/TWB.c b/programs/cmm/browser/TWB/TWB.c index 9a2ffdc82d..b4116354e0 100644 --- a/programs/cmm/browser/TWB/TWB.c +++ b/programs/cmm/browser/TWB/TWB.c @@ -46,7 +46,7 @@ struct TWebBrowser { dword is_html; collection img_url; char header[150]; - char line[500]; + char linebuf[500]; char redirect[URL_SIZE]; void SetStyle(); @@ -76,6 +76,7 @@ struct TWebBrowser { void tag_iframe(); void tag_title(); void tag_font(); + void tag_table_reset(); void tag_table(); void tag_td(); void tag_tr(); @@ -100,21 +101,13 @@ void TWebBrowser::SetPageDefaults() bg_colors.add(DEFAULT_BG_COL); canvas.Fill(0, DEFAULT_BG_COL); header = NULL; - cur_encoding = CH_CP866; draw_y = BODY_MARGIN; draw_x = left_gap = BODY_MARGIN; draw_w = list.w - BODY_MARGIN; - line = 0; + linebuf = 0; redirect = '\0'; - //hold original buffer - if (o_bufpointer) o_bufpointer=free(o_bufpointer); - o_bufpointer = malloc(bufsize); - memmov(o_bufpointer, bufpointer, bufsize); - if (custom_encoding != -1) { - cur_encoding = custom_encoding; - bufpointer = ChangeCharset(cur_encoding, "CP866", bufpointer); - } list.SetFont(8, 14, 10011000b); + tag_table_reset(); } //============================================================================================ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){ @@ -124,12 +117,25 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){ if (list.w!=canvas.bufw) canvas.Init(list.x, list.y, list.w, 400*20); - if (bufpointer != _bufpointer) { + if (bufpointer == _bufpointer) { + custom_encoding = cur_encoding; + } else { bufpointer = malloc(bufsize); memmov(bufpointer, _bufpointer, bufsize); - } else { - custom_encoding = CH_CP866; + + //hold original buffer + o_bufpointer = malloc(bufsize); + memmov(o_bufpointer, bufpointer, bufsize); + + cur_encoding = CH_CP866; + if (custom_encoding != -1) { + cur_encoding = custom_encoding; + bufpointer = ChangeCharset(cur_encoding, "CP866", bufpointer); + bufsize = strlen(bufpointer); + } } + + SetPageDefaults(); is_html = true; if (!strstri(bufpointer, "= draw_w) RenderTextbuf(); } //============================================================================================ diff --git a/programs/cmm/browser/TWB/render.h b/programs/cmm/browser/TWB/render.h index 6c955e5d18..76979d5498 100644 --- a/programs/cmm/browser/TWB/render.h +++ b/programs/cmm/browser/TWB/render.h @@ -9,20 +9,20 @@ void TWebBrowser::RenderLine() if (style.title) { - strncpy(#header, #line, sizeof(TWebBrowser.header)-1); + strncpy(#header, #linebuf, sizeof(TWebBrowser.header)-1); strncat(#header, " - ", sizeof(TWebBrowser.header)-1); strncat(#header, #version, sizeof(TWebBrowser.header)-1); - line = 0; + linebuf = 0; return; } if (t_html) && (!t_body) { - line = 0; + linebuf = 0; return; } - if (line) + if (linebuf) { - pw = strlen(#line) * list.font_w; + pw = strlen(#linebuf) * list.font_w; zoom = list.font_w / BASIC_CHAR_W; style.cur_line_h = math.max(style.cur_line_h, list.item_h); @@ -42,19 +42,19 @@ void TWebBrowser::RenderLine() pc = text_colors.get_last(); if (link) && (pc == text_colors.get(0)) pc = link_color_default; - canvas.WriteText(draw_x, draw_y, list.font_type, pc, #line, NULL); - if (style.b) canvas.WriteText(draw_x+1, draw_y, list.font_type, pc, #line, NULL); + canvas.WriteText(draw_x, draw_y, list.font_type, pc, #linebuf, NULL); + if (style.b) canvas.WriteText(draw_x+1, draw_y, list.font_type, pc, #linebuf, NULL); if (style.s) canvas.DrawBar(draw_x, list.item_h / 2 - zoom + draw_y, pw, zoom, pc); if (style.u) canvas.DrawBar(draw_x, list.item_h - zoom - zoom + draw_y, pw, zoom, pc); if (link) { - if (line[0]==' ') && (line[1]==NULL) {} else { + if (linebuf[0]==' ') && (linebuf[1]==NULL) {} else { canvas.DrawBar(draw_x, draw_y + list.item_h - calc(zoom*2)-1, pw, zoom, link_color_default); links.add_text(draw_x, draw_y + list.y, pw, list.item_h - calc(zoom*2)-1, zoom); } } draw_x += pw; - if (debug_mode) debugln(#line); - line = NULL; + if (debug_mode) debugln(#linebuf); + linebuf = NULL; } } @@ -65,26 +65,26 @@ void TWebBrowser::RenderTextbuf() int zoom = list.font_w / BASIC_CHAR_W; //Do we need a line break? - while (strlen(#line) * list.font_w + draw_x >= draw_w) { + while (strlen(#linebuf) * list.font_w + draw_x >= draw_w) { //Yes, we do. Lets calculate where... - break_pos = strrchr(#line, ' '); + break_pos = strrchr(#linebuf, ' '); //Is a new line fits in the current line? if (break_pos * list.font_w + draw_x > draw_w) { break_pos = draw_w - draw_x /list.font_w; - while(break_pos) && (line[break_pos]!=' ') break_pos--; + while(break_pos) && (linebuf[break_pos]!=' ') break_pos--; } //Maybe a new line is too big for the whole new line? Then we have to split it - if (!break_pos) && (style.tag_list.level*5 + strlen(#line) * zoom >= list.column_max) { + if (!break_pos) && (style.tag_list.level*5 + strlen(#linebuf) * zoom >= list.column_max) { break_pos = draw_w - draw_x / list.font_w; } - strcpy(#next_line, #line + break_pos); - line[break_pos] = 0x00; + strcpy(#next_line, #linebuf + break_pos); + linebuf[break_pos] = 0x00; RenderLine(); - strcpy(#line, #next_line); + strcpy(#linebuf, #next_line); NewLine(); } RenderLine(); diff --git a/programs/cmm/browser/TWB/set_style.h b/programs/cmm/browser/TWB/set_style.h index 39b0a1cacf..705bed90cf 100644 --- a/programs/cmm/browser/TWB/set_style.h +++ b/programs/cmm/browser/TWB/set_style.h @@ -104,11 +104,11 @@ void TWebBrowser::tag_iframe() { if (tag.get_value_of("src")) { NewLine(); - strcpy(#line, "IFRAME: "); + strcpy(#linebuf, "IFRAME: "); RenderTextbuf(); link=true; links.add_link(tag.value); - strncpy(#line, tag.value, sizeof(TWebBrowser.line)-1); + strncpy(#linebuf, tag.value, sizeof(TWebBrowser.linebuf)-1); RenderTextbuf(); link=false; NewLine(); @@ -137,17 +137,25 @@ void TWebBrowser::tag_meta_xml() EDX = strrchr(tag.value, '=') + tag.value; //search in content= if (ESBYTE[EDX] == '"') EDX++; strlwr(EDX); - if (streqrp(EDX,"utf-8")) || (streqrp(EDX,"utf8")) ChangeEncoding(CH_UTF8); - else if (streqrp(EDX,"windows-1251")) || (streqrp(EDX,"windows1251")) ChangeEncoding(CH_CP1251); - else if (streqrp(EDX,"dos")) || (streqrp(EDX,"cp-866")) ChangeEncoding(CH_CP866); - else if (streqrp(EDX,"iso-8859-5")) || (streqrp(EDX,"iso8859-5")) ChangeEncoding(CH_ISO8859_5); - else if (streqrp(EDX,"koi8-r")) || (streqrp(EDX,"koi8-u")) ChangeEncoding(CH_KOI8); + EAX = get_encoding_type_by_name(EDX); + if (EAX!=-1) ChangeEncoding(EAX); } if (streq(tag.get_value_of("http-equiv"), "refresh")) && (tag.get_value_of("content")) { if (tag.value = strstri(tag.value, "url")) strcpy(#redirect, tag.value); } } +signed int get_encoding_type_by_name(dword name) +{ + EDX = name; + if (streqrp(EDX,"utf-8")) || (streqrp(EDX,"utf8")) return CH_UTF8; + else if (streqrp(EDX,"windows-1251")) || (streqrp(EDX,"windows1251")) return CH_CP1251; + else if (streqrp(EDX,"dos")) || (streqrp(EDX,"cp-866")) return CH_CP866; + else if (streqrp(EDX,"iso-8859-5")) || (streqrp(EDX,"iso8859-5")) return CH_ISO8859_5; + else if (streqrp(EDX,"koi8-r")) || (streqrp(EDX,"koi8-u")) return CH_KOI8; + return -1; +} + void TWebBrowser::tag_code() { if (style.pre = tag.opened) { @@ -178,12 +186,12 @@ void TWebBrowser::tag_li() if (!style.tag_list.level) style.tag_list.upd_level(1, 'u'); if (!style.pre) NewLine(); if (style.tag_list.order_type() == 'u') { - strcpy(#line, "\31 "); + strcpy(#linebuf, "\31 "); draw_x = style.tag_list.level * 5 - 2 * list.font_w + left_gap; } if (style.tag_list.order_type() == 'o') { - sprintf(#line, "%i. ", style.tag_list.inc_counter()); - draw_x = style.tag_list.level * 5 - 1 - strlen(#line) * list.font_w + left_gap; + sprintf(#linebuf, "%i. ", style.tag_list.inc_counter()); + draw_x = style.tag_list.level * 5 - 1 - strlen(#linebuf) * list.font_w + left_gap; } } } @@ -217,7 +225,7 @@ void TWebBrowser::tag_body() void TWebBrowser::tag_q() { - chrncat(#line, '\"', sizeof(TWebBrowser.line)); + chrncat(#linebuf, '\"', sizeof(TWebBrowser.linebuf)); } void TWebBrowser::tag_h1234_caption() @@ -292,14 +300,14 @@ IMGOK: NOIMG: if (tag.get_value_of("title")) || (tag.get_value_of("alt")) { - strncpy(#img_path, tag.value, sizeof(TWebBrowser.line)-3); - sprintf(#line, "[%s]", #img_path); + strncpy(#img_path, tag.value, sizeof(TWebBrowser.linebuf)-3); + sprintf(#linebuf, "[%s]", #img_path); } else { if (streqrp(#img_path, "data:")) img_path=0; replace_char(#img_path, '?', NULL, strlen(#img_path)); - img_path[sizeof(TWebBrowser.line)-3] = '\0'; //prevent overflow in sprintf - sprintf(#line, "[%s]", #img_path+strrchr(#img_path, '/')); - line[50]= NULL; + img_path[sizeof(TWebBrowser.linebuf)-3] = '\0'; //prevent overflow in sprintf + sprintf(#linebuf, "[%s]", #img_path+strrchr(#img_path, '/')); + linebuf[50]= NULL; } text_colors.add(0x9A6F29); style.image = true; @@ -310,56 +318,83 @@ NOIMG: -unsigned tr_y; -unsigned td_x, td_w; -unsigned highest_td; -int table_c=0; + + + +struct TABLE { + int count; + int col; + collection_int cx; + int row_y, next_row_y; +} table; + +void TWebBrowser::tag_table_reset() +{ + table.count = 0; + table.cx.drop(); + table.row_y = 0; + table.next_row_y = 0; +} void TWebBrowser::tag_table() { - if (tag.opened) table_c++; else table_c--; - if (!tag.opened) NewLine(); + if (tag.opened) { + if (!table.count) { + table.next_row_y = table.row_y = draw_y; + } + table.count++; + } else { + table.count--; + if (!table.count) { + draw_y = math.max(draw_y + style.cur_line_h, table.next_row_y); + left_gap = BODY_MARGIN; + } + } } void TWebBrowser::tag_tr() { - if (table_c>1) return; - //style.tr = tag.opened; - NewLine(); - draw_w = list.w - left_gap; - left_gap = BODY_MARGIN; - td_w = 0; if (tag.opened) { - tr_y = draw_y; - } else { - //draw_y = highest_td; + if (table.count>1) { + NewLine(); + } else { + table.col = 0; + table.row_y = math.max(draw_y + style.cur_line_h, table.next_row_y); + left_gap = BODY_MARGIN; + NewLine(); + } } } void TWebBrowser::tag_td() { - if (table_c>1) return; + if (table.count>1) return; if (tag.opened) { - NewLine(); - //highest_td = math.max(draw_y, tr_y); - if (tag.get_value_of("width")) td_w = atoi(tag.value); - draw_y = tr_y; - draw_x = left_gap; - debugval("td_w", td_w); - if (td_w > 20) draw_w = td_w; else draw_w = list.w - left_gap; - } else { - left_gap += td_w; - draw_w = list.w - left_gap; - /* - draw_w -= left_gap; - if (left_gap < 0) left_gap = BODY_MARGIN; - if (draw_w < 0) { - left_gap = BODY_MARGIN; - draw_w = list.w - left_gap; + table.next_row_y = math.max(draw_y + style.cur_line_h, table.next_row_y); + draw_y = table.row_y; + table.cx.set(table.col, math.max(draw_x,table.cx.get(table.col)) ); + draw_x = left_gap = table.cx.get(table.col); + table.col++; + if (tag.get_value_of("width")) { + draw_w = EAX; + //debugval("draw_w", atoi(tag.value)); + table.cx.set(table.col, draw_x + atoi(tag.value)); } - debugval("left_gap", left_gap); - */ + //if (tag.get_value_of("height")) table.next_row_y = draw_y + atoi(tag.value); + } + + if (left_gap >= list.w - list.font_w - 10) { + notify("left_gap overflow"); + draw_x = left_gap = BODY_MARGIN; + table.cx.drop(); + table.count = 999; + NewLine(); } - debugval("td_w", td_w); - debugval("left_gap", left_gap); -} \ No newline at end of file + + if (draw_w < 0) || (draw_w >= list.w) { + notify("draw_w overflow"); + draw_x = left_gap = BODY_MARGIN; + draw_w = list.w - left_gap; + NewLine(); + } +} diff --git a/programs/cmm/browser/WebView.c b/programs/cmm/browser/WebView.c index 81156c2742..ad6345005c 100644 --- a/programs/cmm/browser/WebView.c +++ b/programs/cmm/browser/WebView.c @@ -231,7 +231,7 @@ void main() if (http_get_type==PAGE) { history.add(http.cur_url); if (!strchr(http.cur_url, '?')) { - cache.add(http.cur_url, http.content_pointer, http.content_received, PAGE); + cache.add(http.cur_url, http.content_pointer, http.content_received, PAGE, WB1.custom_encoding); } LoadInternalPage(http.content_pointer, http.content_received); free(http.content_pointer); @@ -239,9 +239,9 @@ void main() else if (http_get_type==IMG) { _IMG_RES: if (http.status_code >= 200) && (http.status_code < 300) { - cache.add(cur_img_url, http.content_pointer, http.content_received, IMG); + cache.add(cur_img_url, http.content_pointer, http.content_received, IMG, NULL); } else { - cache.add(cur_img_url, 0, 0, IMG); + cache.add(cur_img_url, 0, 0, IMG, NULL); } free(http.content_pointer); GetImg(false); @@ -271,7 +271,7 @@ void ProcessButtonClick(dword id__) case 1: ExitProcess(); case TAB_CLOSE_ID...TAB_CLOSE_ID+TABS_MAX: EventTabClose(id__ - TAB_CLOSE_ID); return; case TAB_ID...TAB_ID+TABS_MAX: EventAllTabsClick(id__ - TAB_ID); return; - case ENCODINGS...ENCODINGS+6: EventChangeEncodingAndLoadPage(id__-ENCODINGS); return; + case ENCODINGS...ENCODINGS+6: EventManuallyChangeEncoding(id__-ENCODINGS); return; case NEW_WINDOW: RunProgram(#program_path, NULL); return; case NEW_TAB: if (!http.transfer) EventOpenNewTab(URL_SERVICE_HOMEPAGE); return; case SCAN_CODE_BS: @@ -378,7 +378,7 @@ void draw_window() DrawTabsBar(); } -void EventChangeEncodingAndLoadPage(int _new_encoding) +void EventManuallyChangeEncoding(int _new_encoding) { dword newbuf, newsize; WB1.custom_encoding = _new_encoding; @@ -535,6 +535,7 @@ void OpenPage(dword _open_URL) //CACHED PAGE if (cache.current_type==PAGE) { history.add(#new_url); + WB1.custom_encoding = cache.current_charset; LoadInternalPage(cache.current_buf, cache.current_size); } else { @@ -544,6 +545,7 @@ void OpenPage(dword _open_URL) } else if (!strncmp(#new_url,"WebView:",8)) { //INTERNAL PAGE history.add(#new_url); + WB1.custom_encoding = -1; if (streq(#new_url, URL_SERVICE_HOMEPAGE)) LoadInternalPage(#buildin_page_home, sizeof(buildin_page_home)); else if (streq(#new_url, URL_SERVICE_HELP)) LoadInternalPage(#buildin_page_help, sizeof(buildin_page_help)); else if (streq(#new_url, URL_SERVICE_TEST)) LoadInternalPage(#buildin_page_test, sizeof(buildin_page_test)); @@ -774,7 +776,7 @@ void ProcessMenuClick() void EventSeachWeb() { char new_url[URL_SIZE+1]; - replace_char(#editURL, ' ', '_', URL_SIZE); + replace_char(#editURL, ' ', '+', URL_SIZE); strcpy(#new_url, "https://www.google.com/search?q="); strncat(#new_url, #editURL, URL_SIZE); OpenPage(#new_url); @@ -890,6 +892,10 @@ void CheckContentType() char content_type[64]; if (http.header_field("content-type", #content_type, sizeof(content_type))) // application || image + if (strchr(#content_type, '=')) { + WB1.custom_encoding = get_encoding_type_by_name(EAX+1); + } + if (content_type[0] == 'i') { EventDownloadAndOpenImage(http.cur_url); StopLoading(); diff --git a/programs/cmm/browser/cache.h b/programs/cmm/browser/cache.h index d4d0a53958..13729d1846 100644 --- a/programs/cmm/browser/cache.h +++ b/programs/cmm/browser/cache.h @@ -7,16 +7,18 @@ struct _cache dword current_buf; dword current_size; dword current_type; + dword current_charset; collection url; collection_int data; collection_int size; collection_int type; + collection_int charset; void add(); bool has(); void clear(); } cache=0; -void _cache::add(dword _url, _data, _size, _type) +void _cache::add(dword _url, _data, _size, _type, _charset) { dword data_pointer; data_pointer = malloc(_size); @@ -26,6 +28,7 @@ void _cache::add(dword _url, _data, _size, _type) url.add(_url); size.add(_size); type.add(_type); + charset.add(_charset); current_buf = data_pointer; current_size = _size; @@ -39,6 +42,7 @@ bool _cache::has(dword _link) current_buf = data.get(pos); current_size = size.get(pos); current_type = type.get(pos); + current_charset = charset.get(pos); return true; } return false; diff --git a/programs/cmm/browser/const.h b/programs/cmm/browser/const.h index 1b4dac8ef5..a8ebbf81af 100644 --- a/programs/cmm/browser/const.h +++ b/programs/cmm/browser/const.h @@ -4,9 +4,9 @@ // CYRILLIC // // // //===================================================// -char buildin_page_error[] = FROM "res/page_not_found_ru.htm""\0"; -char buildin_page_home[] = FROM "res/homepage_ru.htm""\0"; -char buildin_page_help[] = FROM "res/help_ru.htm""\0"; +char buildin_page_error[] = FROM "res/page_not_found_ru.htm"; +char buildin_page_home[] = FROM "res/homepage_ru.htm"; +char buildin_page_help[] = FROM "res/help_ru.htm"; char accept_language[]= "Accept-Language: ru\n"; char rmb_menu[] = "Посмотреть исходник|Ctrl+U @@ -41,9 +41,9 @@ char clear_cache_ok[] = "'WebView\n // ENGLISH // // // //===================================================// -char buildin_page_error[] = FROM "res/page_not_found_en.htm""\0"; -char buildin_page_home[] = FROM "res/homepage_en.htm""\0"; -char buildin_page_help[] = FROM "res/help_en.htm""\0"; +char buildin_page_error[] = FROM "res/page_not_found_en.htm"; +char buildin_page_home[] = FROM "res/homepage_en.htm"; +char buildin_page_help[] = FROM "res/help_en.htm"; char accept_language[]= "Accept-Language: en\n"; char rmb_menu[] = "View source|Ctrl+U @@ -79,7 +79,7 @@ char clear_cache_ok[] = "'WebView\nThe cache has been cleared.' -tI"; // // //===================================================// -char buildin_page_test[] = FROM "res/test.htm""\0"; +char buildin_page_test[] = FROM "res/test.htm"; #define URL_SERVICE_HISTORY "WebView:history" #define URL_SERVICE_HOMEPAGE "WebView:home" @@ -107,4 +107,4 @@ char editbox_icons[] = FROM "res/editbox_icons.raw"; #define DEFAULT_URL URL_SERVICE_HOMEPAGE -char version[]="WebView 3.28"; \ No newline at end of file +char version[]="WebView 3.29"; \ No newline at end of file diff --git a/programs/cmm/browser/tabs.h b/programs/cmm/browser/tabs.h index 2ea3b7c784..2de08c43da 100644 --- a/programs/cmm/browser/tabs.h +++ b/programs/cmm/browser/tabs.h @@ -154,6 +154,7 @@ void EventTabClose(int _id) tab.close(_id); } DrawTabsBar(); + DrawStatusBar(NULL); } void EventCloseActiveTab() @@ -172,6 +173,7 @@ void EventTabClick(int _id) WB1.ParseHtml(WB1.bufpointer, WB1.bufsize); WB1.DrawPage(); SetOmniboxText(history.current()); + DrawStatusBar(NULL); } void EventOpenNewTab(dword _url)