From d05eee03680b748dfcaa4c3238a6425dd85941f8 Mon Sep 17 00:00:00 2001 From: "Kirill Lipatov (Leency)" Date: Thu, 26 Mar 2020 00:12:32 +0000 Subject: [PATCH] WebView 2.0 beta 1: - rewrite tag parsing procedure to use collections - completely refactor TWB component - fix a couple of issues: broken tabs in
, mishandle of local page not found, anchors memory leak
 - so finally all id's and names are included as anchors* (*some anchors in
 web still don't work)

git-svn-id: svn://kolibrios.org@7752 a494cfbc-eb01-0410-851d-a64ba20cac60
---
 programs/cmm/TWB/TWB.c           | 385 +++++++++++++++----------------
 programs/cmm/TWB/absolute_url.h  |   2 +-
 programs/cmm/TWB/anchors.h       |  20 +-
 programs/cmm/TWB/colors.h        |   3 +
 programs/cmm/TWB/links.h         |   4 +-
 programs/cmm/TWB/parce_tag.h     | 136 +++++++----
 programs/cmm/TWB/unicode_tags.h  |  86 +++----
 programs/cmm/browser/WebView.c   |  98 ++++----
 programs/cmm/codeview/codeview.c |  14 +-
 programs/cmm/lib/strings.h       |   6 +-
 programs/cmm/liza/liza.c         |   2 +
 programs/cmm/liza/mail_box.c     |   2 +-
 12 files changed, 393 insertions(+), 365 deletions(-)

diff --git a/programs/cmm/TWB/TWB.c b/programs/cmm/TWB/TWB.c
index f3928bef3e..2f35c28934 100644
--- a/programs/cmm/TWB/TWB.c
+++ b/programs/cmm/TWB/TWB.c
@@ -1,10 +1,14 @@
-
-scroll_bar scroll_wv = { 15,200,398,44,0,2,115,15,0,0xeeeeee,0xBBBbbb,0xeeeeee,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1};
+#include "..\TWB\colors.h"
+#include "..\TWB\anchors.h"
+#include "..\TWB\parce_tag.h"
+#include "..\TWB\absolute_url.h"
+char line[500];
+#include "..\TWB\unicode_tags.h"
 
 enum { ALIGN_LEFT, ALIGN_CENTER, ALIGN_RIGHT};
 
 struct _style {
-bool
+	bool
 	b, u, s, h,
 	pre,
 	blq,
@@ -13,18 +17,19 @@ bool
 	button,
 	image,
 	align;
-dword 
-	bg_color;
+	dword bg_color;
 };
 
 struct TWebBrowser {
 	llist list;
 	_style style;
 	DrawBufer DrawBuf;
+	dword draw_y, stolbec;
 	int zoom;
-	bool opened; //is this a "start tag" or "end tag"
+	dword o_bufpointer;
 	void SetPageDefaults();
-	void Prepare();
+	void AddCharToTheLine();
+	void ParseHtml();
 	void SetStyle();
 	void DrawStyle();
 	void DrawPage();
@@ -34,6 +39,9 @@ struct TWebBrowser {
 	void BufEncode();
 } WB1;
 
+dword link_color_inactive;
+dword link_color_active;
+dword page_bg;
 
 bool 	
 	link,
@@ -41,40 +49,19 @@ bool
 	t_html,
 	t_body;
 
+#include "..\TWB\img_cache.h"
+#include "..\TWB\links.h"
+
 dword bufpointer=0;
-dword o_bufpointer=0;
 dword bufsize=0;
 
-dword text_colors[300];
-dword text_color_index;
-dword link_color_inactive;
-dword link_color_active;
-dword page_bg;
-
-int draw_y;
-int stolbec;
-int tab_len;
-
-int body_magrin=5;
+int body_magrin=6;
 int basic_line_h=22;
 
-char URL[10000];
-char header[2048];
-char line[500];
-char tagparam[10000];
-char tag[100];
+char header[150];
 char oldtag[100];
-char attr[1200];
-char val[4096];
-
-#include "..\TWB\absolute_url.h"
-#include "..\TWB\links.h"
-#include "..\TWB\anchors.h"
-#include "..\TWB\colors.h"
-#include "..\TWB\unicode_tags.h"
-#include "..\TWB\img_cache.h"
-#include "..\TWB\parce_tag.h"
 
+scroll_bar scroll_wv = { 15,200,398,44,0,2,115,15,0,0xeeeeee,0xBBBbbb,0xeeeeee,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1};
 
 //============================================================================================
 void TWebBrowser::DrawStyle()
@@ -83,7 +70,7 @@ void TWebBrowser::DrawStyle()
 	
 	if (!header)
 	{
-		strcpy(#header, #line);
+		strncpy(#header, #line, sizeof(header)-1);
 		line = 0;
 		return;
 	}
@@ -138,20 +125,39 @@ void TWebBrowser::SetPageDefaults()
 	style.bg_color = page_bg;
 	DrawBuf.Fill(0, page_bg);
 	PageLinks.Clear();
-	strcpy(#header, #version);
+	anchors.clear();
+	strncpy(#header, #version, sizeof(header)-1);
 	cur_encoding = CH_NULL;
 	draw_y = body_magrin;
 	stolbec = 0;
 	line = 0;
-	zoom = 1;	
+	zoom = 1;
+	if (o_bufpointer) free(o_bufpointer);
+	o_bufpointer = 0;
 }
 //============================================================================================
-void TWebBrowser::Prepare(){
-	word bukva[2];
-	dword j;
-	bool ignor_param;
-	dword bufpos;
+void TWebBrowser::AddCharToTheLine(unsigned char _char)
+{
 	dword line_len;
+	if (_char<=15) _char=' ';
+	line_len = strlen(#line);
+	if (!style.pre) && (_char == ' ')
+	{
+		if (line[line_len-1]==' ') return; //no double spaces
+		if (!stolbec) && (!line) return; //no paces at the beginning of the line
+	}
+	if (line_len < sizeof(line)) chrcat(#line, _char);
+	CheckForLineBreak();
+}
+//============================================================================================
+void TWebBrowser::ParseHtml(){
+	word bukva[2];
+	char unicode_symbol[10];
+	dword unicode_symbol_result;
+	dword j;
+	bool ignor_param=false;
+	int tab_len;
+	dword bufpos;
 	SetPageDefaults();
 	if (strstri(bufpointer, "';') && (j<8); j++)
+			for (j=1, unicode_symbol=0; (ESBYTE[bufpos+j]<>';') && (j<8); j++)
 			{
 				bukva = ESBYTE[bufpos+j];
-				chrcat(#tag, bukva);
+				chrcat(#unicode_symbol, bukva);
 			}
-			if (bukva = GetUnicodeSymbol(#tag)) {
+			if (bukva = GetUnicodeSymbol(#unicode_symbol)) {
 				bufpos += j;
 				CheckForLineBreak();
 			} else {
-				bukva = '&';
-				goto DEFAULT_MARK;
+				AddCharToTheLine('&');
 			}
 			break;
 		case '<':
@@ -205,70 +210,56 @@ void TWebBrowser::Prepare(){
 				bufpos+=2;
 				break;
 			}
-			tag = attr = tagparam = ignor_param = NULL;
+			tag.reset();
+			if (ESBYTE[bufpos] == '/') {
+				tag.opened = false;
+				bufpos++;
+			}
+
+			ignor_param=false;
 			while (ESBYTE[bufpos] !='>') && (bufpos < bufpointer + bufsize) //ïîëó÷àåì òåã è åãî ïàðàìåòðû
 			{
 				bukva = ESBYTE[bufpos];
 				if (bukva == '\9') || (bukva == '\x0a') || (bukva == '\x0d') bukva = ' ';
 				if (!ignor_param) && (bukva <>' ')
 				{
-					if (strlen(#tag)+1", #tag);
-				j = strstri(bufpos, #tagparam);
-				if (j!=-1) {
-					bufpos = j-1;
-				}
-				tag = tagparam = NULL;
+			if (tag.is("script")) || (tag.is("style")) || (tag.is("binary")) || (tag.is("select"))  { 
+				sprintf(#tag.params, "", #tag.name);
+				j = strstri(bufpos, #tag.params);
+				if (j!=-1) bufpos = j-1;
 				break;
 			}
 
-			if (tag[strlen(#tag)-1]=='/') tag[strlen(#tag)-1]=NULL; //for br/
-			if (tagparam) GetNextParam();
+			if (tag.name[strlen(#tag.name)-1]=='/') tag.name[strlen(#tag.name)-1]=NULL; //for br/ !!!!!!!!
+			if (tag.params) tag.parse_params();
 
-			if (tag[0] == '/') 
-			{
-				 opened = 0;
-				 strcpy(#tag, #tag+1);
-			}
-			else opened = 1;
-
-			if (tag) && (!istag("span")) && (!istag("i")) && (!istag("svg")) {
+			if (tag.name) && (!tag.is("i")) && (!tag.is("svg")) {
 				CheckForLineBreak();
 				DrawStyle();
-				if (tag) SetStyle();
+				if (tag.name) SetStyle();
 			}
-			strlcpy(#oldtag, #tag, sizeof(oldtag)-1);
-			tag = attr = tagparam = ignor_param = NULL;
+			strncpy(#oldtag, #tag.name, sizeof(oldtag)-1);
 			break;
 		default:
-			DEFAULT_MARK:
-			if (bukva<=15) bukva=' ';
-			line_len = strlen(#line);
-			if (!style.pre) && (bukva == ' ')
-			{
-				if (line[line_len-1]==' ') break; //no double spaces
-				if (!stolbec) && (!line) break; //no paces at the beginning of the line
-			}
-			if (line_len < sizeof(line)) chrcat(#line, bukva);
-			CheckForLineBreak();
+			AddCharToTheLine(ESBYTE[bufpos]);
 		}
 	}
 	DrawStyle();
 	NewLine();
-	if (list.first == 0) list.count = draw_y;
+	list.count = draw_y;
 	DrawPage();
 }
 //============================================================================================
@@ -297,122 +288,115 @@ bool TWebBrowser::CheckForLineBreak()
 //============================================================================================
 void TWebBrowser::SetStyle() {
 	char img_path[4096]=0;
-	int left1 = body_magrin + list.x;
 	int meta_encoding;
-	if (istag("html")) {
-		t_html = opened;
+
+	dword value;
+
+	if (tag.is("html")) {
+		t_html = tag.opened;
 		return;
 	}
-	if(istag("title")) {
-		if (opened) header=NULL;
+	if(tag.is("title")) {
+		if (tag.opened) header=NULL;
 		return;
 	}
 	
-	IF(istag("q"))
+	IF(tag.is("q"))
 	{
-		if (opened)	{
+		if (tag.opened)	{
 			meta_encoding = strlen(#line);
 			if (line[meta_encoding-1] != ' ') chrcat(#line, ' ');
 			chrcat(#line, '\"');
 		}
-		if (!opened) strcat(#line, "\" ");
+		if (!tag.opened) strcat(#line, "\" ");
 		return;
 	}
-	if (isattr("id=")) || (isattr("name=")) { // TO FIX: works only if the param is the last
-		anchors.add(#val, draw_y);
+	if (value = tag.get_value_of("name=")) || (value = tag.get_value_of("id=")) {
+		anchors.add(value, draw_y);
 	}	
-	if (istag("body")) {
-		t_body = opened;
-		do{
-			if (isattr("link=")) link_color_inactive = GetColor(#val);
-			if (isattr("alink=")) link_color_active = GetColor(#val);
-			if (isattr("text=")) text_colors[0]=GetColor(#val);
-			if (isattr("bgcolor="))
-			{
-				style.bg_color = page_bg = GetColor(#val);
-				DrawBuf.Fill(0, page_bg);
-			}
-		} while(GetNextParam());
-		if (opened) && (cur_encoding==CH_NULL) {
-			cur_encoding = CH_CP866; 
-			//BufEncode(CH_UTF8);
-			debugln("Document has no information about encoding!");
+	if (tag.is("body")) {
+		t_body = tag.opened;
+		if (value = tag.get_value_of("link="))  link_color_inactive = GetColor(value);
+		if (value = tag.get_value_of("alink=")) link_color_active = GetColor(value);
+		if (value = tag.get_value_of("text="))  text_colors[0]=GetColor(value);
+		if (value = tag.get_value_of("bgcolor=")) {
+			style.bg_color = page_bg = GetColor(value);
+			DrawBuf.Fill(0, page_bg);
 		}
-		if (opened) {
-			if (strcmp(#header, #version) != 0) {
+		if (tag.opened) {
+			if (cur_encoding==CH_NULL) {
+				cur_encoding = CH_CP866; 
+				//BufEncode(CH_UTF8);
+				debugln("Document has no information about encoding!");
+			}
+			if (!streq(#header, #version)) {
 				ChangeCharset(charsets[cur_encoding], "CP866", #header);
-				sprintf(#header, "%s - %s", #header, #version);
+				strncat(#header, " - ", sizeof(header)-1);
+				strncat(#header, #version, sizeof(header)-1);
 			}
 			DrawTitle(#header);
 		}
 		return;
 	}
-	if (istag("a")) {
-		if (opened)
+	if (tag.is("a")) {
+		if (tag.opened)
 		{
 			if (link) IF(text_color_index > 0) text_color_index--; //åñëè ïðåäûäóùèé òåã à íå áûë çàêðûò
-			do{
-				if (isattr("href=")) && (!strstr(#val,"javascript:"))
-				{
-					text_color_index++;
-					text_colors[text_color_index] = text_colors[text_color_index-1];
-					link = 1;
-					text_colors[text_color_index] = link_color_inactive;
-					PageLinks.AddLink(#val);
-				}
-			} while(GetNextParam());
-		}
-		else {
+			if (value = tag.get_value_of("href=")) && (!strstr(value,"javascript:"))
+			{
+				text_color_index++;
+				text_colors[text_color_index] = text_colors[text_color_index-1];
+				link = 1;
+				text_colors[text_color_index] = link_color_inactive;
+				PageLinks.AddLink(value);
+			}
+		} else {
 			link = 0;
 			IF(text_color_index > 0) text_color_index--;
 		}
 		return;
 	}
-	if (istag("font")) {
+	if (tag.is("font")) {
 		style.bg_color = page_bg;
-		if (opened)
+		if (tag.opened)
 		{
 			text_color_index++;
 			text_colors[text_color_index] = text_colors[text_color_index-1];
-			do{
-				if (isattr("color=")) text_colors[text_color_index] = GetColor(#val);
-				if (isattr("bg=")) style.bg_color = GetColor(#val);
-			} while(GetNextParam());
+			if (value = tag.get_value_of("color=")) text_colors[text_color_index] = GetColor(value);
+			if (value = tag.get_value_of("bg=")) style.bg_color = GetColor(value);
 		}
 		else if (text_color_index > 0) text_color_index--;
 		return;
 	}
-	if (istag("div")) {
-		if (streq(#oldtag,"div")) && (opened) return;
+	if (tag.is("div")) {
+		if (streq(#oldtag,"div")) && (tag.opened) return;
 		NewLine();
 		//IF (oldtag[0] != 'h') 
 		return;
 	}
-	if (istag("header")) || (istag("article")) || (istag("footer")) || (istag("figure")) {
+	if (tag.is("header")) || (tag.is("article")) || (tag.is("footer")) || (tag.is("figure")) {
 		NewLine();
 		return;
 	}
-	if (istag("p")) {
+	if (tag.is("p")) {
 		IF (oldtag[0] == 'h') || (streq(#oldtag,"td")) || (streq(#oldtag,"p")) return;
 		NewLine();
-		//IF(opened) NewLine();
+		//IF(tag.opened) NewLine();
 		return;
 	}
-	if (istag("br")) { NewLine(); return; }
-	if (istag("tr")) { if (opened) NewLine(); return; }
-	if (istag("b")) || (istag("strong")) || (istag("big")) { style.b = opened; return; }
-	if (istag("button")) { style.button = opened; stolbec++; return; }
-	if (istag("u")) || (istag("ins")) { style.u=opened; return;}
-	if (istag("s")) || (istag("strike")) || (istag("del")) { style.s=opened; return; }
-	if (istag("dd")) { stolbec += 5; return; }
-	if (istag("blockquote")) { style.blq = opened; return; }
-	if (istag("pre")) || (istag("code")) { style.pre = opened; return; }
-	if (istag("img")) {
-		do{
-			if (isattr("src=")) strlcpy(#img_path, #val, sizeof(img_path)-1);
-			if (isattr("title=")) && (strlen(#val)0) && (__isWhite(params[i])) i--;
 
-	if (debug_mode) {
-		debug("tag: "); debugln(#tag);
-		debug("tagparam: "); debugln(#tagparam);
-	}
-	
-	i = strlen(#tagparam) - 1;
-
-	if (tagparam[i] == '/') i--;
-
-	while (i>0) && (__isWhite(tagparam[i])) i--;
-
-	if (tagparam[i] == '"') || (tagparam[i] == '\'')
+	if (params[i] == '"') || (params[i] == '\'')
 	{
-		//find VAL end
-		quotes = tagparam[i];
-		tagparam[i] = '\0'; i--;
+		//remove quotes
+		quotes = params[i];
+		params[i] = EOS;
+		i--;
 
 		//find VAL start and copy
-		i = strrchr(#tagparam, quotes);
-		strlcpy(#val, #tagparam + i, sizeof(val)-1);
-		tagparam[i] = '\0'; i--;
+		i = strrchr(#params, quotes);
+		strlcpy(#val, #params + i, sizeof(val)-1);
+		params[i] = EOS; 
+		i--;
 
 		//find ATTR end
-		while (i > 0) && (tagparam[i] != '=') i--;
-		tagparam[i+1] = '\0';
+		while (i > 0) && (params[i] != '=') i--;
+		params[i+1] = EOS;
 	}
 	else
 	{
-		//find VAL end
-		//already have
-
 		//find VAL start and copy
-		while (i > 0) && (tagparam[i] != '=') i--;
+		while (i > 0) && (params[i] != '=') i--;
 		i++;
-		strlcpy(#val, #tagparam + i, sizeof(val)-1);
-		// tagparam[i] = '\0';
+		strlcpy(#val, #params + i, sizeof(val)-1);
 
-		//find ATTR end
-		//already have
+		//already have ATTR end
 	}
 
 	//find ATTR start and copy
-	while (i>0) && (!__isWhite(tagparam[i])) i--;
-	strlcpy(#attr, #tagparam + i + 1, sizeof(attr)-1);
-	tagparam[i] = '\0';
+	while (i>0) && (!__isWhite(params[i])) i--;
+	strlcpy(#attr, #params + i + 1, sizeof(attr)-1);
+	strlwr(#attr);
+	params[i] = '\0';
  
 	//fix case: src=./images/KolibriOS_logo2.jpg?sid=e8ece8b38b
 	i = strchr(#attr,'=');
@@ -58,15 +109,18 @@ bool GetNextParam()
 		ESBYTE[i+1] = '\0';
 	}
 
-	strlwr(#attr);
-
-	if (debug_mode) {
-		debug("val: "); debugln(#val);
-		debug("attr: "); debugln(#attr);
-		debugln(" ");
-	}
+	attributes.add(#attr);
+	values.add(#val);
 
 	return true;
 }
 
-
+dword _tag::get_value_of(dword _attr_name)
+{
+	int pos = attributes.get_pos_by_name(_attr_name);
+	if (pos == -1) {
+		return 0;
+	} else {
+		return values.get(pos);
+	}
+}
diff --git a/programs/cmm/TWB/unicode_tags.h b/programs/cmm/TWB/unicode_tags.h
index 71d5296b5c..d791574ce8 100644
--- a/programs/cmm/TWB/unicode_tags.h
+++ b/programs/cmm/TWB/unicode_tags.h
@@ -1,67 +1,53 @@
-char *unicode_tags[]={
-"nbsp",  " ",
-"#38",   " ",
-"#160",  " ",
-
-"copy",  "(c)",
-"#169",  "(c)",
+char *unicode_symbols[]={
+"#32", " ",      
+"#34", "\"",     "quot","\"",
+"#38", "&",      "amp", "&",
+"#39", "'",
+"#039","'",
+"#60", "<",      "lt",  "<",
+"#62", ">",      "gt",  ">",
+"#91", "[",
+"#93", "]",
+"#96", "'",
+"#149","-",
+"#151","-",
+"#160"," ",     "nbsp", " ",
+"#169","(c)",   "copy", "(c)",
+"#171","<<",    "laquo","<<",
+"#174","(r)",   "reg",  "(r)",
+"#187",">>",    "raquo",">>",
 
 "trade", "[TM]",
-
-"reg",   "(r)",
-"#174",  "(r)",
-
 "bdquo", ",,",
 
-"amp",   "&",
-"#38",   "&",
-
-"lt",    "<",
-"#60",   "<",
-
-"gt",    ">",
-"#62",   ">",
-
 "minus", "-",
 "ndash", "-",
 "mdash", "-", //--
-"#8722", "-",
-"#8211", "-",
-"#151",  "-",
-"#149",  "-",
-"#9642", "-", //square in the middle of the line
 
 "rsquo", "'",
-"#39",   "'",
-"#039",  "'",
-"#96",   "'",
+
+"#1028", "\242",
+"#1030", "I",
+"#1031", "\244",
+
+"#8211", "-",
 "#8217", "'",
-
-"quot",  "\"",
-"#34",   "\"",
-"ldquo", "\"",
-"rdquo", "\"",
-"#8222", "\"",
-"#8221", "\"",
-
-"laquo", "<<",
-"#171",  "<<",
-"raquo", ">>",
-"#187",  ">>",
+"#8222", "\"", "ldquo", "\"",
+"#8221", "\"", "rdquo", "\"",
+"#8470", "N",
+"#8722", "-",
+"#9642", "-", //square in the middle of the line
 
 "uarr",  "\24",
 "darr",  "\25",
 "rarr",  "\26",
 "larr",  "\27", 
 
-"#1028", "\242",
-"#1030", "I",
-"#1031", "\244",
-
-"#8470", "N",
-"bull",  "\31", //  
+"bull",  "\31",
 "percnt","%",
 
+"#xfeff", "",
+
 0}; 
 
 
@@ -71,18 +57,18 @@ bool GetUnicodeSymbol(dword in_tag)
 {
 	int j, specia1040;
 	
-	for (j=0; unicode_tags[j]!=0; j+=2;) 
+	for (j=0; unicode_symbols[j]!=0; j+=2;) 
 	{
-		if (!strcmp(in_tag, unicode_tags[j]))
+		if (!strcmp(in_tag, unicode_symbols[j]))
 		{
-			strcat(#line, unicode_tags[j+1]);
+			strcat(#line, unicode_symbols[j+1]);
 			return true;
 		}
 	}
 
 	specia1040 = atoi(in_tag + 1) - 1040;
 	
-	if (tag[1] == '1') && (specia1040>=0) 
+	if (ESBYTE[in_tag+1] == '1') && (specia1040>=0) 
 	&& (specia1040<=72) && (strlen(in_tag) == 5)
 	{
 		if (strlen(#line)