diff --git a/programs/develop/clink/.gitmodules b/programs/develop/clink/.gitmodules new file mode 100644 index 0000000000..ad30ad70cb --- /dev/null +++ b/programs/develop/clink/.gitmodules @@ -0,0 +1,9 @@ +[submodule "epep"] + path = epep + url = https://github.com/mkostoevr/epep +[submodule "cvec"] + path = cvec + url = https://github.com/mkostoevr/cvec +[submodule "cdict"] + path = cdict + url = https://github.com/mkostoevr/cdict diff --git a/programs/develop/clink/README.md b/programs/develop/clink/README.md new file mode 100644 index 0000000000..1ed31ac79f --- /dev/null +++ b/programs/develop/clink/README.md @@ -0,0 +1,5 @@ +# clink + +Very simple (yet?) COFF to COFF linker. Merges several COFF files into one. Only works for dependency-less objects yet (designed exclusively for linking objects into KolibriOS COFF library). + +[Development process](https://www.youtube.com/playlist?list=PLVMOfciBdLI6BPovPgfVZ9bNuwbXetRc9) (in Russian) diff --git a/programs/develop/clink/README.txt b/programs/develop/clink/README.txt new file mode 100644 index 0000000000..59f5f104dc --- /dev/null +++ b/programs/develop/clink/README.txt @@ -0,0 +1 @@ +Checkout the latest version from https://github.com/mkostoevr/clink \ No newline at end of file diff --git a/programs/develop/clink/a.out.obj b/programs/develop/clink/a.out.obj new file mode 100644 index 0000000000..3e1129230e Binary files /dev/null and b/programs/develop/clink/a.out.obj differ diff --git a/programs/develop/clink/main.c b/programs/develop/clink/main.c new file mode 100644 index 0000000000..0065dcaf2f --- /dev/null +++ b/programs/develop/clink/main.c @@ -0,0 +1,562 @@ +// TODO: Substract section's RVA from external and static defined symbol's value +// TODO: Substract section's RVA from relocations + +#include +#include +#include +#include +#include + +#define EPEP_INST +#include "epep/epep.h" + +typedef char *pchar; + +typedef struct { + Epep epep; + char *name; + size_t *section_offsets; +} CoffObject; + +typedef struct { + size_t obj_id; + size_t sec_id; +} ObjIdSecId; + +typedef struct { + ObjIdSecId *source; + uint32_t characteristics; + size_t size; + size_t number_of_relocations; +} SectionInfo; + +typedef struct { + EpepCoffSymbol sym; + EpepCoffSymbol *auxes; + char *name; + size_t object_index; + size_t index; +} Symbol; + +#define CDICT_VAL_T SectionInfo +#define CDICT_INST +#include "cdict/cdict.h" + +#define CDICT_VAL_T Symbol +#define CDICT_INST +#include "cdict/cdict.h" + +typedef struct { + CoffObject *objects; + char **section_names_set; + CDict_CStr_SectionInfo info_per_section; + CDict_CStr_Symbol symtab; + char **sym_name_set; + size_t number_of_symbols; +} ObjectIr; + +#define CVEC_INST +#define CVEC_TYPE CoffObject +#include "cvec/cvec.h" + +#define CVEC_INST +#define CVEC_TYPE size_t +#include "cvec/cvec.h" + +#define CVEC_INST +#define CVEC_TYPE pchar +#include "cvec/cvec.h" + +#define CVEC_INST +#define CVEC_TYPE char +#include "cvec/cvec.h" + +#define CVEC_INST +#define CVEC_TYPE ObjIdSecId +#include "cvec/cvec.h" + +#define CVEC_INST +#define CVEC_TYPE EpepCoffSymbol +#include "cvec/cvec.h" + +#define ERROR_EPEP(epep) printf("Error: epep returned %u at "__FILE__":%u", \ + (epep)->error_code, __LINE__); exit(-1) + +#define ERROR_CDICT(cdict) printf("Error: cdict returned %u at "__FILE__":%u", \ + (cdict)->error_code, __LINE__); exit(-1); + +static void fwrite8(FILE *f, uint8_t b) { + fputc(b, f); +} + +static void fwrite16(FILE *f, uint16_t w) { + fputc((w & 0x00ff) >> 0, f); + fputc((w & 0xff00) >> 8, f); +} + +static void fwrite32(FILE *f, uint32_t d) { + fputc((d & 0x000000ff) >> 0, f); + fputc((d & 0x0000ff00) >> 8, f); + fputc((d & 0x00ff0000) >> 16, f); + fputc((d & 0xff000000) >> 24, f); +} + +static size_t strtab_add(char **strtab, char *str) { + size_t res = cvec_char_size(strtab); + + for (char *p = str; *p; p++) { + cvec_char_push_back(strtab, *p); + } + cvec_char_push_back(strtab, '\0'); + return res + 4; +} + +static size_t get_section_number(char ***section_names_set, char *sec_name) { + for (size_t i = 0; i < cvec_pchar_size(section_names_set); i++) { + char *it = cvec_pchar_at(section_names_set, i); + if (!strcmp(it, sec_name)) { + return i + 1; + } + } + return 0; +} + +static void add_name_to_set(char *sym_name, char ***set) { + for (size_t i = 0; i < cvec_pchar_size(set); i++) { + char *it = cvec_pchar_at(set, i); + if (!strcmp(it, sym_name)) { + return; + } + } + cvec_pchar_push_back(set, sym_name); +} + +static void build(ObjectIr *ir) { + FILE *out = fopen("a.out.obj", "wb"); + char *strtab = cvec_char_new(1024); + size_t size_of_sections = 0; + size_t number_of_relocations = 0; + + printf("Calculating all sections size and relocations count... "); + for (size_t sec_i = 0; sec_i < cvec_pchar_size(&ir->section_names_set); sec_i++) { + char *name = ir->section_names_set[sec_i]; + + SectionInfo si = cdict_CStr_SectionInfo_get_v(&ir->info_per_section, name); + size_of_sections += si.size; + number_of_relocations += si.number_of_relocations; + } + printf("Done: %u & %u\n", size_of_sections, number_of_relocations); + + size_t fisrt_section_offset = 20 + 40 * cvec_pchar_size(&ir->section_names_set); + size_t offset_to_first_relocation = fisrt_section_offset + size_of_sections; + size_t offset_to_next_relocation = offset_to_first_relocation; + size_t next_section_offset = fisrt_section_offset; + + size_t PointerToSymbolTable = fisrt_section_offset + size_of_sections + number_of_relocations * 10; + + // COFF Header + printf("Writing COFF header... "); + fwrite16(out, 0x14c); // Machine + fwrite16(out, cvec_pchar_size(&ir->section_names_set)); // NumberOfSections + fwrite32(out, 0); // TimeDataStamp + fwrite32(out, PointerToSymbolTable); // PointerToSymbolTable + fwrite32(out, ir->number_of_symbols); // NumberOfSymbols + fwrite16(out, 0); // SizeOfOptionalHeader + fwrite16(out, 0); // Characteristics + printf("Done.\n"); + + // Section Headers + printf("Writing section headers {\n"); + for (size_t sec_i = 0; sec_i < cvec_pchar_size(&ir->section_names_set); sec_i++) { + char *name = ir->section_names_set[sec_i]; + SectionInfo si = cdict_CStr_SectionInfo_get_v(&ir->info_per_section, name); + + // Name + printf(" Writing %s Section Header... ", name); + if (strlen(name) <= 8) { + for (size_t i = 0; i < 8; i++) { + size_t sl = strlen(name); + fwrite8(out, i < sl ? name[i] : '\0'); + } + } else { + fwrite8(out, '/'); + + size_t strtab_index = strtab_add(&strtab, name); + char numstr[8] = { 0 }; + sprintf(numstr, "%u", strtab_index); + fwrite(numstr, 1, 7, out); + } + fwrite32(out, 0); // VirtualSize + fwrite32(out, 0); // VirtualAddress + fwrite32(out, si.size); // SizeOfRawData + fwrite32(out, next_section_offset); // PointerToRawData + next_section_offset += si.size; + fwrite32(out, offset_to_next_relocation); // PointerToRelocations + offset_to_next_relocation += si.number_of_relocations * 10; + fwrite32(out, 0); // PointerToLinenumbers + fwrite16(out, si.number_of_relocations); // NumberOfRelocations + fwrite16(out, 0); // NumberOfLinenumbers + fwrite32(out, si.characteristics); // Characteristics + printf("Done.\n"); + } + printf("}\n"); + + // Section data + printf("Writing sections {\n"); + for (size_t sec_i = 0; sec_i < cvec_pchar_size(&ir->section_names_set); sec_i++) { + char *name = ir->section_names_set[sec_i]; + SectionInfo si = cdict_CStr_SectionInfo_get_v(&ir->info_per_section, name); + + printf(" Writing %s... ", name); + for (size_t i = 0; i < cvec_ObjIdSecId_size(&si.source); i++) { + ObjIdSecId id = cvec_ObjIdSecId_at(&si.source, i); + CoffObject *object = &ir->objects[id.obj_id]; + Epep *epep = &object->epep; + + EpepSectionHeader sh = { 0 }; + if (!epep_get_section_header_by_index(epep, &sh, id.sec_id)) { + ERROR_EPEP(epep); + } + char *buf = malloc(sh.SizeOfRawData); + if (!epep_get_section_contents(epep, &sh, buf)) { + ERROR_EPEP(epep); + } + fwrite(buf, 1, sh.SizeOfRawData, out); + } + printf("Done.\n"); + } + printf("}\n"); + + // COFF Relocations + printf("Writing COFF Relocations {\n"); + for (size_t sec_i = 0; sec_i < cvec_pchar_size(&ir->section_names_set); sec_i++) { + char *name = ir->section_names_set[sec_i]; + SectionInfo si = cdict_CStr_SectionInfo_get_v(&ir->info_per_section, name); + + printf(" Writing relocations of %s {\n", name); + for (size_t i = 0; i < cvec_ObjIdSecId_size(&si.source); i++) { + ObjIdSecId id = cvec_ObjIdSecId_at(&si.source, i); + CoffObject *object = &ir->objects[id.obj_id]; + Epep *epep = &object->epep; + + size_t strtab_size = 0; + if (!epep_get_string_table_size(epep, &strtab_size)) { + ERROR_EPEP(epep); + } + + char *obj_strtab = malloc(strtab_size); + if (!epep_get_string_table(epep, obj_strtab)) { + ERROR_EPEP(epep); + } + + EpepSectionHeader sh = { 0 }; + if (!epep_get_section_header_by_index(epep, &sh, id.sec_id)) { + ERROR_EPEP(epep); + } + for (size_t rel_i = 0; rel_i < sh.NumberOfRelocations; rel_i++) { + EpepCoffRelocation rel = { 0 }; + + if (!epep_get_section_relocation_by_index(epep, &sh, &rel, rel_i)) { + ERROR_EPEP(epep); + } + printf(" { %02x, %02x, %02x }", rel.VirtualAddress, rel.SymbolTableIndex, rel.Type); + rel.VirtualAddress += object->section_offsets[sec_i]; + { + size_t index = rel.SymbolTableIndex; + EpepCoffSymbol sym = { 0 }; + + if (!epep_get_symbol_by_index(epep, &sym, index)) { + ERROR_EPEP(epep); + } + + size_t name_max = 1024; + char name[name_max]; + + if (sym.symbol.Zeroes == 0) { + strcpy(name, &obj_strtab[sym.symbol.Offset]); + } else { + memcpy(name, sym.symbol.ShortName, 8); + name[8] = '\0'; + } + + if (!strcmp(name, "_EXPORTS")) { + strcpy(name, "EXPORTS"); + } + + if (sym.symbol.StorageClass != 2) { + sprintf(name, "%s@%s", name, object->name); + } + + Symbol old_sym = cdict_CStr_Symbol_get_v(&ir->symtab, name); + + if (old_sym.name == NULL) { + printf("Internal error: Symbol of %s relocation not found", name); + exit(-1); + } + + rel.SymbolTableIndex = old_sym.index; + printf(" -> { %02x, %02x, %02x }: ", rel.VirtualAddress, rel.SymbolTableIndex, rel.Type); + printf("New relocation of %s in %s\n", name, sh.Name); + } + fwrite(&rel, 1, 10, out); + } + } + printf(" }\n"); + } + printf("}\n"); + + // Symbols Table + printf("Writing symbols {\n"); + for (size_t sym_i = 0; sym_i < cvec_pchar_size(&ir->sym_name_set); sym_i++) { + char *name = ir->sym_name_set[sym_i]; + + Symbol sym = cdict_CStr_Symbol_get_v(&ir->symtab, name); + + if (sym.sym.symbol.SectionNumber == 0xffff || + sym.sym.symbol.SectionNumber == 0xfffe || + (sym.sym.symbol.StorageClass != 2 && sym.sym.symbol.StorageClass != 3)) { + fwrite(&sym.sym.symbol, 1, 18, out); + } else { + size_t sec_name_max = 1024; + char sec_name[sec_name_max]; + + size_t object_index = sym.object_index; + CoffObject *object = &ir->objects[object_index]; + Epep *epep = &object->epep; + size_t section_offset = object->section_offsets[sym.sym.symbol.SectionNumber - 1]; + + size_t strtab_size = 0; + if (!epep_get_string_table_size(epep, &strtab_size)) { + ERROR_EPEP(epep); + } + + char *obj_strtab = malloc(strtab_size); + if (!epep_get_string_table(epep, obj_strtab)) { + ERROR_EPEP(epep); + } + + EpepSectionHeader sh = { 0 }; + if (!epep_get_section_header_by_index(epep, &sh, sym.sym.symbol.SectionNumber - 1)) { + ERROR_EPEP(epep); + } + + if (sh.Name[0] == '/') { + strcpy(sec_name, &obj_strtab[atoi(&sh.Name[1])]); + } else { + memcpy(sec_name, sh.Name, 8); + sec_name[8] = '\0'; + } + + printf("%s:\n", sym.name); + printf(" Section: %s\n", sec_name); + printf(" StorageClass: %u\n", sym.sym.symbol.StorageClass); + + sym.sym.symbol.SectionNumber = get_section_number(&ir->section_names_set, sec_name); + + if (sym.sym.symbol.SectionNumber == 0) { + printf("Internal error: %s section is not found in output file"); + exit(-1); + } + + sym.sym.symbol.Value += section_offset; + + if (strlen(sym.name) <= 8) { + strcpy(sym.sym.symbol.ShortName, sym.name); + } else { + sym.sym.symbol.Zeroes = 0; + sym.sym.symbol.Offset = strtab_add(&strtab, name); + } + + fwrite(&sym.sym.symbol, 1, 18, out); + } + for (size_t aux_i = 0; aux_i < sym.sym.symbol.NumberOfAuxSymbols; aux_i++) { + fwrite(&sym.auxes[aux_i].symbol, 1, 18, out); + } + } + printf("}\n"); + + // COFF String Table + printf("Writing COFF String Table... "); + fwrite32(out, cvec_pchar_size(&strtab) + 4); + fwrite(strtab, 1, cvec_pchar_size(&strtab), out); + printf("Done.\n"); +} + +static ObjectIr parse_objects(int argc, char **argv) { + CoffObject *objects = cvec_CoffObject_new(128); + char **section_names_set = cvec_pchar_new(4); + char **sym_name_set = cvec_pchar_new(128); + size_t number_of_symbols = 0; + + for (int i = 1; i < argc; i++) { + printf("Primary parsing of %s... ", argv[i]); + + CoffObject object = { 0 }; + object.name = argv[i]; + object.section_offsets = cvec_size_t_new(128); + + { + FILE *fp = fopen(object.name, "rb"); + if (!fp) { + printf("Error: Can't open \"%s\"", object.name); + exit(-1); + } + + if (!epep_init(&object.epep, fp)) { + ERROR_EPEP(&object.epep); + } + } + + cvec_CoffObject_push_back(&objects, object); + + printf("Done.\n"); + } + + CDict_CStr_Symbol symtab; + + if (!cdict_CStr_Symbol_init(&symtab)) { + ERROR_CDICT(&symtab); + } + + CDict_CStr_SectionInfo info_per_section; + + if (!cdict_CStr_SectionInfo_init(&info_per_section)) { + ERROR_CDICT(&info_per_section); + } + + for (size_t i = 0; i < cvec_CoffObject_size(&objects); i++) { + printf("Secondary parsing of %s {\n", objects[i].name); + + Epep *epep = &(objects[i].epep); + + size_t strtab_size = 0; + if (!epep_get_string_table_size(epep, &strtab_size)) { + ERROR_EPEP(epep); + } + + char *strtab = malloc(strtab_size); + if (!epep_get_string_table(epep, strtab)) { + ERROR_EPEP(epep); + } + + // Fill symbols table + printf(" Symbols {\n"); + for (size_t sym_i = 0; sym_i < epep->coffFileHeader.NumberOfSymbols; sym_i++) { + EpepCoffSymbol sym = { 0 }; + + if (!epep_get_symbol_by_index(epep, &sym, sym_i)) { + ERROR_EPEP(epep); + } + + size_t name_max = 1024; + char name[name_max]; + + if (sym.symbol.Zeroes == 0) { + strcpy(name, &strtab[sym.symbol.Offset]); + } else { + memcpy(name, sym.symbol.ShortName, 8); + name[8] = '\0'; + } + + if (!strcmp(name, "_EXPORTS")) { + strcpy(name, "EXPORTS"); + } + + if (sym.symbol.StorageClass != 2) { + sprintf(name, "%s@%s", name, objects[i].name); + } + + if (sym.symbol.StorageClass != 2 || sym.symbol.SectionNumber) { + if (memcmp(cdict_CStr_Symbol_get_v(&symtab, name).sym.symbol.ShortName, "\0\0\0\0\0\0\0\0", 8)) { + printf("Error: Redefinition of \"%s\"", name); + exit(-1); + } + + EpepCoffSymbol *auxes = cvec_EpepCoffSymbol_new(1); + size_t index = number_of_symbols; + + for (size_t aux_i = 0; aux_i < sym.symbol.NumberOfAuxSymbols; aux_i++) { + EpepCoffSymbol aux = { 0 }; + + if (!epep_get_symbol_by_index(epep, &aux, sym_i + aux_i)) { + ERROR_EPEP(epep); + } + cvec_EpepCoffSymbol_push_back(&auxes, aux); + number_of_symbols++; + } + + Symbol new_sym = { sym, auxes, strdup(name), i, index }; + if (!cdict_CStr_Symbol_add_vv(&symtab, strdup(name), new_sym, CDICT_NO_CHECK)) { + ERROR_CDICT(&symtab); + } + number_of_symbols++; + + printf(" Symbol #%u: %s (%u auxes, #%u)\n", sym_i, name, cvec_EpepCoffSymbol_size(&auxes), number_of_symbols - 1); + + add_name_to_set(strdup(name), &sym_name_set); + } + + sym_i += sym.symbol.NumberOfAuxSymbols; + } + printf(" }\n"); + + // Set section offsets and fill unique section name set + printf(" Sections {\n"); + for (size_t sec_i = 0; sec_i < epep->coffFileHeader.NumberOfSections; sec_i++) { + EpepSectionHeader sh = { 0 }; + + if (!epep_get_section_header_by_index(epep, &sh, sec_i)) { + ERROR_EPEP(epep); + } + + size_t name_max = 1024; + char name[name_max]; + + if (sh.Name[0] == '/') { + strcpy(name, &strtab[atoi(&sh.Name[1])]); + } else { + memcpy(name, sh.Name, 8); + name[8] = '\0'; + } + + add_name_to_set(strdup(name), §ion_names_set); + + SectionInfo si = cdict_CStr_SectionInfo_get_v(&info_per_section, name); + if (si.source == NULL) { + si.source = cvec_ObjIdSecId_new(32); + } + + size_t sec_offset = si.size; + cvec_size_t_push_back(&objects[i].section_offsets, sec_offset); + + si.size += sh.SizeOfRawData; + si.characteristics |= sh.Characteristics; + si.number_of_relocations += sh.NumberOfRelocations; + cvec_ObjIdSecId_push_back(&si.source, (ObjIdSecId){ i, sec_i }); + cdict_CStr_SectionInfo_add_vv(&info_per_section, strdup(name), si, CDICT_REPLACE_EXIST); + + printf(" Section #%llu {\n", sec_i); + printf(" Name: %s\n", name); + printf(" Virtual Address: %u\n", sh.VirtualAddress); + printf(" Characteristics: %08x\n", sh.Characteristics); + printf(" Offset in the big section: %u\n", objects[i].section_offsets[sec_i]); + printf(" }\n"); + } + printf(" }\n"); + printf("}\n"); + } + + ObjectIr ir; + ir.objects = objects; + ir.section_names_set = section_names_set; + ir.info_per_section = info_per_section; + ir.symtab = symtab; + ir.sym_name_set = sym_name_set; + ir.number_of_symbols = number_of_symbols; + return ir; +} + +int main(int argc, char **argv) { + ObjectIr ir = parse_objects(argc, argv); + build(&ir); +} diff --git a/programs/develop/clink/test.bat b/programs/develop/clink/test.bat new file mode 100644 index 0000000000..068f359f42 --- /dev/null +++ b/programs/develop/clink/test.bat @@ -0,0 +1,3 @@ +tcc -run main.c test\a.obj test\b.obj +tcc -run epep/example.c a.out.obj +pause diff --git a/programs/develop/clink/test/a.c b/programs/develop/clink/test/a.c new file mode 100644 index 0000000000..ec8f63ca02 --- /dev/null +++ b/programs/develop/clink/test/a.c @@ -0,0 +1,3 @@ +int f() { + return 42; +} \ No newline at end of file diff --git a/programs/develop/clink/test/a.obj b/programs/develop/clink/test/a.obj new file mode 100644 index 0000000000..32f634f203 Binary files /dev/null and b/programs/develop/clink/test/a.obj differ diff --git a/programs/develop/clink/test/b.c b/programs/develop/clink/test/b.c new file mode 100644 index 0000000000..c3022909cd --- /dev/null +++ b/programs/develop/clink/test/b.c @@ -0,0 +1,5 @@ +int f(); + +int main() { + return f(); +} \ No newline at end of file diff --git a/programs/develop/clink/test/b.obj b/programs/develop/clink/test/b.obj new file mode 100644 index 0000000000..72e795f09f Binary files /dev/null and b/programs/develop/clink/test/b.obj differ diff --git a/programs/develop/clink/test/rel.c b/programs/develop/clink/test/rel.c new file mode 100644 index 0000000000..8f7fa9a21f --- /dev/null +++ b/programs/develop/clink/test/rel.c @@ -0,0 +1,15 @@ +int a; + +static int b; + +static int g() { + return b; +} + +int f() { + return a + g(); +} + +int main() { + return f(); +} diff --git a/programs/develop/clink/test/rel.obj b/programs/develop/clink/test/rel.obj new file mode 100644 index 0000000000..6f57829103 Binary files /dev/null and b/programs/develop/clink/test/rel.obj differ