WebView 3.29: reworked encodings detection

git-svn-id: svn://kolibrios.org@8492 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Kirill Lipatov (Leency) 2020-12-29 10:31:45 +00:00
parent 7913a353d7
commit f81b4538a5
7 changed files with 160 additions and 108 deletions

View File

@ -46,7 +46,7 @@ struct TWebBrowser {
dword is_html;
collection img_url;
char header[150];
char line[500];
char linebuf[500];
char redirect[URL_SIZE];
void SetStyle();
@ -76,6 +76,7 @@ struct TWebBrowser {
void tag_iframe();
void tag_title();
void tag_font();
void tag_table_reset();
void tag_table();
void tag_td();
void tag_tr();
@ -100,21 +101,13 @@ void TWebBrowser::SetPageDefaults()
bg_colors.add(DEFAULT_BG_COL);
canvas.Fill(0, DEFAULT_BG_COL);
header = NULL;
cur_encoding = CH_CP866;
draw_y = BODY_MARGIN;
draw_x = left_gap = BODY_MARGIN;
draw_w = list.w - BODY_MARGIN;
line = 0;
linebuf = 0;
redirect = '\0';
//hold original buffer
if (o_bufpointer) o_bufpointer=free(o_bufpointer);
o_bufpointer = malloc(bufsize);
memmov(o_bufpointer, bufpointer, bufsize);
if (custom_encoding != -1) {
cur_encoding = custom_encoding;
bufpointer = ChangeCharset(cur_encoding, "CP866", bufpointer);
}
list.SetFont(8, 14, 10011000b);
tag_table_reset();
}
//============================================================================================
void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
@ -124,12 +117,25 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
if (list.w!=canvas.bufw) canvas.Init(list.x, list.y, list.w, 400*20);
if (bufpointer != _bufpointer) {
if (bufpointer == _bufpointer) {
custom_encoding = cur_encoding;
} else {
bufpointer = malloc(bufsize);
memmov(bufpointer, _bufpointer, bufsize);
} else {
custom_encoding = CH_CP866;
//hold original buffer
o_bufpointer = malloc(bufsize);
memmov(o_bufpointer, bufpointer, bufsize);
cur_encoding = CH_CP866;
if (custom_encoding != -1) {
cur_encoding = custom_encoding;
bufpointer = ChangeCharset(cur_encoding, "CP866", bufpointer);
bufsize = strlen(bufpointer);
}
}
SetPageDefaults();
is_html = true;
if (!strstri(bufpointer, "<body")) {
@ -153,15 +159,15 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
break;
case 0x09:
if (style.pre) {
tab_len = draw_x - left_gap / list.font_w + strlen(#line) % 4;
tab_len = draw_x - left_gap / list.font_w + strlen(#linebuf) % 4;
if (!tab_len) tab_len = 4; else tab_len = 4 - tab_len;
while (tab_len) {chrcat(#line,' '); tab_len--;}
while (tab_len) {chrcat(#linebuf,' '); tab_len--;}
} else {
goto _DEFAULT;
}
break;
case '&': //&nbsp; and so on
bufpos = GetUnicodeSymbol(#line, sizeof(TWebBrowser.line), bufpos+1, bufpointer+bufsize);
bufpos = GetUnicodeSymbol(#linebuf, sizeof(TWebBrowser.linebuf), bufpos+1, bufpointer+bufsize);
break;
case '<':
if (!is_html) goto _DEFAULT;
@ -176,7 +182,7 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
// So if encoding was changed from UTF to DOS than $bufpos position got wrong,
// and we have to start parse from the very beginning
if (EAX != cur_encoding) && (cur_encoding == CH_UTF8) {
ParseHtml(bufpointer, bufsize);
ParseHtml(bufpointer, strlen(bufpointer));
return;
}
}
@ -194,7 +200,6 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
list.CheckDoesValuesOkey();
anchors.current = NULL;
custom_encoding = -1;
if (!header) {
strncpy(#header, #version, sizeof(TWebBrowser.header)-1);
DrawTitle(#header);
@ -203,15 +208,15 @@ void TWebBrowser::ParseHtml(dword _bufpointer, _bufsize){
//============================================================================================
void TWebBrowser::AddCharToTheLine(unsigned char _char)
{
dword line_len = strlen(#line);
dword line_len = strlen(#linebuf);
if (_char<=15) _char=' ';
if (!style.pre) && (_char == ' ')
{
if (line[line_len-1]==' ') return; //no double spaces
if (draw_x==left_gap) && (!line) return; //no paces at the beginning of the line
if (linebuf[line_len-1]==' ') return; //no double spaces
if (draw_x==left_gap) && (!linebuf) return; //no paces at the beginning of the line
if (link) && (line_len==0) return;
}
if (line_len < sizeof(TWebBrowser.line)) chrcat(#line+line_len, _char);
if (line_len < sizeof(TWebBrowser.linebuf)) chrcat(#linebuf+line_len, _char);
if (line_len+1 * list.font_w + draw_x >= draw_w) RenderTextbuf();
}
//============================================================================================

View File

@ -9,20 +9,20 @@ void TWebBrowser::RenderLine()
if (style.title)
{
strncpy(#header, #line, sizeof(TWebBrowser.header)-1);
strncpy(#header, #linebuf, sizeof(TWebBrowser.header)-1);
strncat(#header, " - ", sizeof(TWebBrowser.header)-1);
strncat(#header, #version, sizeof(TWebBrowser.header)-1);
line = 0;
linebuf = 0;
return;
}
if (t_html) && (!t_body) {
line = 0;
linebuf = 0;
return;
}
if (line)
if (linebuf)
{
pw = strlen(#line) * list.font_w;
pw = strlen(#linebuf) * list.font_w;
zoom = list.font_w / BASIC_CHAR_W;
style.cur_line_h = math.max(style.cur_line_h, list.item_h);
@ -42,19 +42,19 @@ void TWebBrowser::RenderLine()
pc = text_colors.get_last();
if (link) && (pc == text_colors.get(0)) pc = link_color_default;
canvas.WriteText(draw_x, draw_y, list.font_type, pc, #line, NULL);
if (style.b) canvas.WriteText(draw_x+1, draw_y, list.font_type, pc, #line, NULL);
canvas.WriteText(draw_x, draw_y, list.font_type, pc, #linebuf, NULL);
if (style.b) canvas.WriteText(draw_x+1, draw_y, list.font_type, pc, #linebuf, NULL);
if (style.s) canvas.DrawBar(draw_x, list.item_h / 2 - zoom + draw_y, pw, zoom, pc);
if (style.u) canvas.DrawBar(draw_x, list.item_h - zoom - zoom + draw_y, pw, zoom, pc);
if (link) {
if (line[0]==' ') && (line[1]==NULL) {} else {
if (linebuf[0]==' ') && (linebuf[1]==NULL) {} else {
canvas.DrawBar(draw_x, draw_y + list.item_h - calc(zoom*2)-1, pw, zoom, link_color_default);
links.add_text(draw_x, draw_y + list.y, pw, list.item_h - calc(zoom*2)-1, zoom);
}
}
draw_x += pw;
if (debug_mode) debugln(#line);
line = NULL;
if (debug_mode) debugln(#linebuf);
linebuf = NULL;
}
}
@ -65,26 +65,26 @@ void TWebBrowser::RenderTextbuf()
int zoom = list.font_w / BASIC_CHAR_W;
//Do we need a line break?
while (strlen(#line) * list.font_w + draw_x >= draw_w) {
while (strlen(#linebuf) * list.font_w + draw_x >= draw_w) {
//Yes, we do. Lets calculate where...
break_pos = strrchr(#line, ' ');
break_pos = strrchr(#linebuf, ' ');
//Is a new line fits in the current line?
if (break_pos * list.font_w + draw_x > draw_w) {
break_pos = draw_w - draw_x /list.font_w;
while(break_pos) && (line[break_pos]!=' ') break_pos--;
while(break_pos) && (linebuf[break_pos]!=' ') break_pos--;
}
//Maybe a new line is too big for the whole new line? Then we have to split it
if (!break_pos) && (style.tag_list.level*5 + strlen(#line) * zoom >= list.column_max) {
if (!break_pos) && (style.tag_list.level*5 + strlen(#linebuf) * zoom >= list.column_max) {
break_pos = draw_w - draw_x / list.font_w;
}
strcpy(#next_line, #line + break_pos);
line[break_pos] = 0x00;
strcpy(#next_line, #linebuf + break_pos);
linebuf[break_pos] = 0x00;
RenderLine();
strcpy(#line, #next_line);
strcpy(#linebuf, #next_line);
NewLine();
}
RenderLine();

View File

@ -104,11 +104,11 @@ void TWebBrowser::tag_iframe()
{
if (tag.get_value_of("src")) {
NewLine();
strcpy(#line, "IFRAME: ");
strcpy(#linebuf, "IFRAME: ");
RenderTextbuf();
link=true;
links.add_link(tag.value);
strncpy(#line, tag.value, sizeof(TWebBrowser.line)-1);
strncpy(#linebuf, tag.value, sizeof(TWebBrowser.linebuf)-1);
RenderTextbuf();
link=false;
NewLine();
@ -137,17 +137,25 @@ void TWebBrowser::tag_meta_xml()
EDX = strrchr(tag.value, '=') + tag.value; //search in content=
if (ESBYTE[EDX] == '"') EDX++;
strlwr(EDX);
if (streqrp(EDX,"utf-8")) || (streqrp(EDX,"utf8")) ChangeEncoding(CH_UTF8);
else if (streqrp(EDX,"windows-1251")) || (streqrp(EDX,"windows1251")) ChangeEncoding(CH_CP1251);
else if (streqrp(EDX,"dos")) || (streqrp(EDX,"cp-866")) ChangeEncoding(CH_CP866);
else if (streqrp(EDX,"iso-8859-5")) || (streqrp(EDX,"iso8859-5")) ChangeEncoding(CH_ISO8859_5);
else if (streqrp(EDX,"koi8-r")) || (streqrp(EDX,"koi8-u")) ChangeEncoding(CH_KOI8);
EAX = get_encoding_type_by_name(EDX);
if (EAX!=-1) ChangeEncoding(EAX);
}
if (streq(tag.get_value_of("http-equiv"), "refresh")) && (tag.get_value_of("content")) {
if (tag.value = strstri(tag.value, "url")) strcpy(#redirect, tag.value);
}
}
signed int get_encoding_type_by_name(dword name)
{
EDX = name;
if (streqrp(EDX,"utf-8")) || (streqrp(EDX,"utf8")) return CH_UTF8;
else if (streqrp(EDX,"windows-1251")) || (streqrp(EDX,"windows1251")) return CH_CP1251;
else if (streqrp(EDX,"dos")) || (streqrp(EDX,"cp-866")) return CH_CP866;
else if (streqrp(EDX,"iso-8859-5")) || (streqrp(EDX,"iso8859-5")) return CH_ISO8859_5;
else if (streqrp(EDX,"koi8-r")) || (streqrp(EDX,"koi8-u")) return CH_KOI8;
return -1;
}
void TWebBrowser::tag_code()
{
if (style.pre = tag.opened) {
@ -178,12 +186,12 @@ void TWebBrowser::tag_li()
if (!style.tag_list.level) style.tag_list.upd_level(1, 'u');
if (!style.pre) NewLine();
if (style.tag_list.order_type() == 'u') {
strcpy(#line, "\31 ");
strcpy(#linebuf, "\31 ");
draw_x = style.tag_list.level * 5 - 2 * list.font_w + left_gap;
}
if (style.tag_list.order_type() == 'o') {
sprintf(#line, "%i. ", style.tag_list.inc_counter());
draw_x = style.tag_list.level * 5 - 1 - strlen(#line) * list.font_w + left_gap;
sprintf(#linebuf, "%i. ", style.tag_list.inc_counter());
draw_x = style.tag_list.level * 5 - 1 - strlen(#linebuf) * list.font_w + left_gap;
}
}
}
@ -217,7 +225,7 @@ void TWebBrowser::tag_body()
void TWebBrowser::tag_q()
{
chrncat(#line, '\"', sizeof(TWebBrowser.line));
chrncat(#linebuf, '\"', sizeof(TWebBrowser.linebuf));
}
void TWebBrowser::tag_h1234_caption()
@ -292,14 +300,14 @@ IMGOK:
NOIMG:
if (tag.get_value_of("title")) || (tag.get_value_of("alt")) {
strncpy(#img_path, tag.value, sizeof(TWebBrowser.line)-3);
sprintf(#line, "[%s]", #img_path);
strncpy(#img_path, tag.value, sizeof(TWebBrowser.linebuf)-3);
sprintf(#linebuf, "[%s]", #img_path);
} else {
if (streqrp(#img_path, "data:")) img_path=0;
replace_char(#img_path, '?', NULL, strlen(#img_path));
img_path[sizeof(TWebBrowser.line)-3] = '\0'; //prevent overflow in sprintf
sprintf(#line, "[%s]", #img_path+strrchr(#img_path, '/'));
line[50]= NULL;
img_path[sizeof(TWebBrowser.linebuf)-3] = '\0'; //prevent overflow in sprintf
sprintf(#linebuf, "[%s]", #img_path+strrchr(#img_path, '/'));
linebuf[50]= NULL;
}
text_colors.add(0x9A6F29);
style.image = true;
@ -310,56 +318,83 @@ NOIMG:
unsigned tr_y;
unsigned td_x, td_w;
unsigned highest_td;
int table_c=0;
struct TABLE {
int count;
int col;
collection_int cx;
int row_y, next_row_y;
} table;
void TWebBrowser::tag_table_reset()
{
table.count = 0;
table.cx.drop();
table.row_y = 0;
table.next_row_y = 0;
}
void TWebBrowser::tag_table()
{
if (tag.opened) table_c++; else table_c--;
if (!tag.opened) NewLine();
if (tag.opened) {
if (!table.count) {
table.next_row_y = table.row_y = draw_y;
}
table.count++;
} else {
table.count--;
if (!table.count) {
draw_y = math.max(draw_y + style.cur_line_h, table.next_row_y);
left_gap = BODY_MARGIN;
}
}
}
void TWebBrowser::tag_tr()
{
if (table_c>1) return;
//style.tr = tag.opened;
NewLine();
draw_w = list.w - left_gap;
left_gap = BODY_MARGIN;
td_w = 0;
if (tag.opened) {
tr_y = draw_y;
} else {
//draw_y = highest_td;
if (table.count>1) {
NewLine();
} else {
table.col = 0;
table.row_y = math.max(draw_y + style.cur_line_h, table.next_row_y);
left_gap = BODY_MARGIN;
NewLine();
}
}
}
void TWebBrowser::tag_td()
{
if (table_c>1) return;
if (table.count>1) return;
if (tag.opened) {
NewLine();
//highest_td = math.max(draw_y, tr_y);
if (tag.get_value_of("width")) td_w = atoi(tag.value);
draw_y = tr_y;
draw_x = left_gap;
debugval("td_w", td_w);
if (td_w > 20) draw_w = td_w; else draw_w = list.w - left_gap;
} else {
left_gap += td_w;
draw_w = list.w - left_gap;
/*
draw_w -= left_gap;
if (left_gap < 0) left_gap = BODY_MARGIN;
if (draw_w < 0) {
left_gap = BODY_MARGIN;
draw_w = list.w - left_gap;
table.next_row_y = math.max(draw_y + style.cur_line_h, table.next_row_y);
draw_y = table.row_y;
table.cx.set(table.col, math.max(draw_x,table.cx.get(table.col)) );
draw_x = left_gap = table.cx.get(table.col);
table.col++;
if (tag.get_value_of("width")) {
draw_w = EAX;
//debugval("draw_w", atoi(tag.value));
table.cx.set(table.col, draw_x + atoi(tag.value));
}
debugval("left_gap", left_gap);
*/
//if (tag.get_value_of("height")) table.next_row_y = draw_y + atoi(tag.value);
}
if (left_gap >= list.w - list.font_w - 10) {
notify("left_gap overflow");
draw_x = left_gap = BODY_MARGIN;
table.cx.drop();
table.count = 999;
NewLine();
}
debugval("td_w", td_w);
debugval("left_gap", left_gap);
}
if (draw_w < 0) || (draw_w >= list.w) {
notify("draw_w overflow");
draw_x = left_gap = BODY_MARGIN;
draw_w = list.w - left_gap;
NewLine();
}
}

View File

@ -231,7 +231,7 @@ void main()
if (http_get_type==PAGE) {
history.add(http.cur_url);
if (!strchr(http.cur_url, '?')) {
cache.add(http.cur_url, http.content_pointer, http.content_received, PAGE);
cache.add(http.cur_url, http.content_pointer, http.content_received, PAGE, WB1.custom_encoding);
}
LoadInternalPage(http.content_pointer, http.content_received);
free(http.content_pointer);
@ -239,9 +239,9 @@ void main()
else if (http_get_type==IMG) {
_IMG_RES:
if (http.status_code >= 200) && (http.status_code < 300) {
cache.add(cur_img_url, http.content_pointer, http.content_received, IMG);
cache.add(cur_img_url, http.content_pointer, http.content_received, IMG, NULL);
} else {
cache.add(cur_img_url, 0, 0, IMG);
cache.add(cur_img_url, 0, 0, IMG, NULL);
}
free(http.content_pointer);
GetImg(false);
@ -271,7 +271,7 @@ void ProcessButtonClick(dword id__)
case 1: ExitProcess();
case TAB_CLOSE_ID...TAB_CLOSE_ID+TABS_MAX: EventTabClose(id__ - TAB_CLOSE_ID); return;
case TAB_ID...TAB_ID+TABS_MAX: EventAllTabsClick(id__ - TAB_ID); return;
case ENCODINGS...ENCODINGS+6: EventChangeEncodingAndLoadPage(id__-ENCODINGS); return;
case ENCODINGS...ENCODINGS+6: EventManuallyChangeEncoding(id__-ENCODINGS); return;
case NEW_WINDOW: RunProgram(#program_path, NULL); return;
case NEW_TAB: if (!http.transfer) EventOpenNewTab(URL_SERVICE_HOMEPAGE); return;
case SCAN_CODE_BS:
@ -378,7 +378,7 @@ void draw_window()
DrawTabsBar();
}
void EventChangeEncodingAndLoadPage(int _new_encoding)
void EventManuallyChangeEncoding(int _new_encoding)
{
dword newbuf, newsize;
WB1.custom_encoding = _new_encoding;
@ -535,6 +535,7 @@ void OpenPage(dword _open_URL)
//CACHED PAGE
if (cache.current_type==PAGE) {
history.add(#new_url);
WB1.custom_encoding = cache.current_charset;
LoadInternalPage(cache.current_buf, cache.current_size);
}
else {
@ -544,6 +545,7 @@ void OpenPage(dword _open_URL)
} else if (!strncmp(#new_url,"WebView:",8)) {
//INTERNAL PAGE
history.add(#new_url);
WB1.custom_encoding = -1;
if (streq(#new_url, URL_SERVICE_HOMEPAGE)) LoadInternalPage(#buildin_page_home, sizeof(buildin_page_home));
else if (streq(#new_url, URL_SERVICE_HELP)) LoadInternalPage(#buildin_page_help, sizeof(buildin_page_help));
else if (streq(#new_url, URL_SERVICE_TEST)) LoadInternalPage(#buildin_page_test, sizeof(buildin_page_test));
@ -774,7 +776,7 @@ void ProcessMenuClick()
void EventSeachWeb()
{
char new_url[URL_SIZE+1];
replace_char(#editURL, ' ', '_', URL_SIZE);
replace_char(#editURL, ' ', '+', URL_SIZE);
strcpy(#new_url, "https://www.google.com/search?q=");
strncat(#new_url, #editURL, URL_SIZE);
OpenPage(#new_url);
@ -890,6 +892,10 @@ void CheckContentType()
char content_type[64];
if (http.header_field("content-type", #content_type, sizeof(content_type))) // application || image
if (strchr(#content_type, '=')) {
WB1.custom_encoding = get_encoding_type_by_name(EAX+1);
}
if (content_type[0] == 'i') {
EventDownloadAndOpenImage(http.cur_url);
StopLoading();

View File

@ -7,16 +7,18 @@ struct _cache
dword current_buf;
dword current_size;
dword current_type;
dword current_charset;
collection url;
collection_int data;
collection_int size;
collection_int type;
collection_int charset;
void add();
bool has();
void clear();
} cache=0;
void _cache::add(dword _url, _data, _size, _type)
void _cache::add(dword _url, _data, _size, _type, _charset)
{
dword data_pointer;
data_pointer = malloc(_size);
@ -26,6 +28,7 @@ void _cache::add(dword _url, _data, _size, _type)
url.add(_url);
size.add(_size);
type.add(_type);
charset.add(_charset);
current_buf = data_pointer;
current_size = _size;
@ -39,6 +42,7 @@ bool _cache::has(dword _link)
current_buf = data.get(pos);
current_size = size.get(pos);
current_type = type.get(pos);
current_charset = charset.get(pos);
return true;
}
return false;

View File

@ -4,9 +4,9 @@
// CYRILLIC //
// //
//===================================================//
char buildin_page_error[] = FROM "res/page_not_found_ru.htm""\0";
char buildin_page_home[] = FROM "res/homepage_ru.htm""\0";
char buildin_page_help[] = FROM "res/help_ru.htm""\0";
char buildin_page_error[] = FROM "res/page_not_found_ru.htm";
char buildin_page_home[] = FROM "res/homepage_ru.htm";
char buildin_page_help[] = FROM "res/help_ru.htm";
char accept_language[]= "Accept-Language: ru\n";
char rmb_menu[] =
"<EFBFBD>®á¬®âà¥âì ¨á室­¨ª|Ctrl+U
@ -41,9 +41,9 @@ char clear_cache_ok[] = "'WebView\n
// ENGLISH //
// //
//===================================================//
char buildin_page_error[] = FROM "res/page_not_found_en.htm""\0";
char buildin_page_home[] = FROM "res/homepage_en.htm""\0";
char buildin_page_help[] = FROM "res/help_en.htm""\0";
char buildin_page_error[] = FROM "res/page_not_found_en.htm";
char buildin_page_home[] = FROM "res/homepage_en.htm";
char buildin_page_help[] = FROM "res/help_en.htm";
char accept_language[]= "Accept-Language: en\n";
char rmb_menu[] =
"View source|Ctrl+U
@ -79,7 +79,7 @@ char clear_cache_ok[] = "'WebView\nThe cache has been cleared.' -tI";
// //
//===================================================//
char buildin_page_test[] = FROM "res/test.htm""\0";
char buildin_page_test[] = FROM "res/test.htm";
#define URL_SERVICE_HISTORY "WebView:history"
#define URL_SERVICE_HOMEPAGE "WebView:home"
@ -107,4 +107,4 @@ char editbox_icons[] = FROM "res/editbox_icons.raw";
#define DEFAULT_URL URL_SERVICE_HOMEPAGE
char version[]="WebView 3.28";
char version[]="WebView 3.29";

View File

@ -154,6 +154,7 @@ void EventTabClose(int _id)
tab.close(_id);
}
DrawTabsBar();
DrawStatusBar(NULL);
}
void EventCloseActiveTab()
@ -172,6 +173,7 @@ void EventTabClick(int _id)
WB1.ParseHtml(WB1.bufpointer, WB1.bufsize);
WB1.DrawPage();
SetOmniboxText(history.current());
DrawStatusBar(NULL);
}
void EventOpenNewTab(dword _url)