aboutsummaryrefslogtreecommitdiffstats
path: root/svm/svmtool.c
blob: e87746110aed0dfb8c53851590ea14660ae95e4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
/* 
 *------------------------------------------------------------------
 * svmtool.c 
 *
 * Copyright (c) 2009 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <netinet/in.h>
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
#include <time.h>
#include <fcntl.h>
#include <string.h>
#include <vppinfra/clib.h>
#include <vppinfra/vec.h>
#include <vppinfra/hash.h>
#include <vppinfra/bitmap.h>
#include <vppinfra/fifo.h>
#include <vppinfra/time.h>
#include <vppinfra/mheap.h>
#include <vppinfra/heap.h>
#include <vppinfra/pool.h>
#include <vppinfra/format.h>

#include "svm.h"



/*
 * format_all_svm_regions
 * Maps / unmaps regions. Do NOT call from client code!
 */
u8 *format_all_svm_regions (u8 *s, va_list * args)
{
    int verbose = va_arg (*args, int);
    svm_region_t *root_rp = svm_get_root_rp();
    svm_main_region_t *mp;
    svm_subregion_t *subp;
    svm_region_t *rp;
    svm_map_region_args_t *a = 0;
    u8 ** svm_names=0;
    u8 *name=0;
    int i;

    ASSERT(root_rp);

    pthread_mutex_lock (&root_rp->mutex);

    s = format (s, "%U", format_svm_region, root_rp, verbose);

    mp = root_rp->data_base;

    /* 
     * Snapshoot names, can't hold root rp mutex across
     * find_or_create.
     */
    pool_foreach (subp, mp->subregions, ({
	  name = vec_dup (subp->subregion_name);
	  vec_add1(svm_names, name);
	}));

    pthread_mutex_unlock (&root_rp->mutex);

    for (i = 0; i < vec_len(svm_names); i++) {
	  vec_validate(a, 0);
	  a->name = (char *) svm_names[i];
	  rp = svm_region_find_or_create (a);
	  if (rp) {
	    pthread_mutex_lock (&rp->mutex);
	    s = format (s, "%U", format_svm_region, rp, verbose);
	    pthread_mutex_unlock (&rp->mutex);
	    svm_region_unmap (rp);
            vec_free(svm_names[i]);
	  }
	  vec_free (a);
    }
    vec_free(svm_names);
    return (s);
}

void show (char *chroot_path, int verbose)
{
    svm_map_region_args_t *a = 0;

    vec_validate (a, 0);

    svm_region_init_chroot(chroot_path);

    fformat(stdout, "My pid is %d\n", getpid());

    fformat(stdout, "%U", format_all_svm_regions, verbose);

    svm_region_exit ();

    vec_free (a);
}


static void *svm_map_region_nolock (svm_map_region_args_t *a)
{
    int svm_fd;
    svm_region_t *rp;
    int deadman=0;
    u8 *shm_name;

    ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);

    shm_name = shm_name_from_svm_map_region_args (a);

    svm_fd = shm_open((char *)shm_name, O_RDWR, 0777);

    if (svm_fd < 0) {
        perror("svm_region_map(mmap open)");
        return (0);
    }
    vec_free (shm_name);

    rp = mmap(0, MMAP_PAGESIZE, 
              PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
    
    if (rp == (svm_region_t *) MAP_FAILED) {
        close(svm_fd);
        clib_warning("mmap");
        return (0);
    }
    /*
     * We lost the footrace to create this region; make sure
     * the winner has crossed the finish line.
     */
    while (rp->version == 0 && deadman++ < 5) {
        sleep(1);
    }

    /*
     * <bleep>-ed? 
     */
    if (rp->version == 0) {
        clib_warning("rp->version %d not %d", rp->version,
                     SVM_VERSION);
        return (0);
    } 
    /* Remap now that the region has been placed */
    a->baseva = rp->virtual_base;
    a->size = rp->virtual_size;
    munmap(rp, MMAP_PAGESIZE);
    
    rp = (void *) mmap ((void *)a->baseva, a->size, 
                        PROT_READ | PROT_WRITE, 
                        MAP_SHARED | MAP_FIXED, svm_fd, 0);
    if ((uword)rp == (uword)MAP_FAILED) {
        clib_unix_warning ("mmap");
        return (0);
    }
    
    if ((uword) rp != rp->virtual_base) {
        clib_warning("mmap botch");
    }
    
    if (pthread_mutex_trylock(&rp->mutex)) {
        clib_warning ("rp->mutex LOCKED by pid %d, tag %d, cleared...",
                      rp->mutex_owner_pid, rp->mutex_owner_tag);
        memset(&rp->mutex, 0, sizeof (rp->mutex));
        
    } else {
        clib_warning ("mutex OK...\n");
        pthread_mutex_unlock(&rp->mutex);
    }

    return ((void *) rp);
}

/*
 * rnd_pagesize
 * Round to a pagesize multiple, presumably 4k works
 */
static unsigned int rnd_pagesize(unsigned int size)
{
    unsigned int rv;

    rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1);
    return(rv);
}

#define MUTEX_DEBUG

always_inline void region_lock(svm_region_t *rp, int tag)
{
    pthread_mutex_lock(&rp->mutex);
#ifdef MUTEX_DEBUG
    rp->mutex_owner_pid = getpid();
    rp->mutex_owner_tag = tag;
#endif    
}

always_inline void region_unlock(svm_region_t *rp)
{
#ifdef MUTEX_DEBUG
    rp->mutex_owner_pid = 0;
    rp->mutex_owner_tag = 0;
#endif    
    pthread_mutex_unlock(&rp->mutex);
}


static void *svm_existing_region_map_nolock (void *root_arg, 
                                             svm_map_region_args_t *a)
{
    svm_region_t *root_rp = root_arg;
    svm_main_region_t *mp;
    svm_region_t *rp;
    void *oldheap;
    uword *p;
    
    a->size +=  MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE;
    a->size = rnd_pagesize(a->size);

    region_lock (root_rp, 4);
    oldheap = svm_push_pvt_heap(root_rp);
    mp = root_rp->data_base;
    
    ASSERT(mp);

    p = hash_get_mem (mp->name_hash, a->name);

    if (p) {
        rp = svm_map_region_nolock (a);
        region_unlock(root_rp);
        svm_pop_heap (oldheap);
        return rp;
    }
    return 0;

}

static void trace (char *chroot_path, char *name, int enable_disable)
{
    svm_map_region_args_t *a = 0;
    svm_region_t *db_rp;
    void *oldheap;

    vec_validate (a, 0);

    svm_region_init_chroot(chroot_path);

    a->name = name;
    a->size = 1<<20;
    a->flags = SVM_FLAGS_MHEAP;

    db_rp = svm_region_find_or_create (a);
    
    ASSERT(db_rp);
    
    region_lock (db_rp, 20);
    
    oldheap = svm_push_data_heap (db_rp);

    mheap_trace (db_rp->data_heap, enable_disable);
    
    svm_pop_heap (oldheap);
    region_unlock (db_rp);

    svm_region_unmap ((void *)db_rp);
    svm_region_exit ();
    vec_free (a);
}



static void subregion_repair(char *chroot_path)
{
    int i;
    svm_main_region_t *mp;
    svm_map_region_args_t a;
    svm_region_t *root_rp;
    svm_region_t *rp;
    svm_subregion_t *subp;
    u8 *name=0;
    u8 ** svm_names=0;

    svm_region_init_chroot(chroot_path);
    root_rp = svm_get_root_rp();
    
    pthread_mutex_lock (&root_rp->mutex);
    
    mp = root_rp->data_base;
    
    /* 
     * Snapshoot names, can't hold root rp mutex across
     * find_or_create.
     */
    pool_foreach (subp, mp->subregions, ({
                name = vec_dup (subp->subregion_name);
                vec_add1(svm_names, name);
            }));
    
    pthread_mutex_unlock (&root_rp->mutex);
    
    for (i = 0; i < vec_len(svm_names); i++) {
        memset (&a, 0, sizeof (a));
        a.root_path = chroot_path;
        a.name = (char *) svm_names[i];
        fformat(stdout, "Checking %s region...\n",
                a.name);
        rp = svm_existing_region_map_nolock (root_rp, &a);
        if (rp) {
            svm_region_unmap (rp);
            vec_free(svm_names[i]);
        }
    }
    vec_free(svm_names);
}

void repair (char *chroot_path, int crash_root_region)
{
    svm_region_t *root_rp = 0;
    svm_map_region_args_t *a = 0;
    void *svm_map_region (svm_map_region_args_t *a);
    int svm_fd;
    u8 *shm_name;

    fformat(stdout, "our pid: %d\n", getpid());

    vec_validate (a, 0);

    a->root_path = chroot_path;
    a->name = SVM_GLOBAL_REGION_NAME;
    a->baseva = SVM_GLOBAL_REGION_BASEVA;
    a->size = SVM_GLOBAL_REGION_SIZE;
    a->flags = SVM_FLAGS_NODATA;

    shm_name = shm_name_from_svm_map_region_args (a);

    svm_fd = shm_open ((char *)shm_name, O_RDWR, 0777);

    if (svm_fd < 0) {
        perror("svm_region_map(mmap open)");
        goto out;
    }

    vec_free(shm_name);

    root_rp = mmap(0, MMAP_PAGESIZE, 
              PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
    
    if (root_rp == (svm_region_t *) MAP_FAILED) {
        close(svm_fd);
        clib_warning("mmap");
        goto out;
    }
    
    /* Remap now that the region has been placed */
    clib_warning ("remap to 0x%x", root_rp->virtual_base);

    a->baseva = root_rp->virtual_base;
    a->size = root_rp->virtual_size;
    munmap(root_rp, MMAP_PAGESIZE);
    
    root_rp = (void *) mmap ((void *)a->baseva, a->size, 
                        PROT_READ | PROT_WRITE, 
                        MAP_SHARED | MAP_FIXED, svm_fd, 0);
    if ((uword)root_rp == (uword)MAP_FAILED) {
        clib_unix_warning ("mmap");
        goto out;
    }
    
    close(svm_fd);

    if ((uword) root_rp != root_rp->virtual_base) {
        clib_warning("mmap botch");
        goto out;
    }
    
    if (pthread_mutex_trylock(&root_rp->mutex)) {
        clib_warning ("root_rp->mutex LOCKED by pid %d, tag %d, cleared...",
                      root_rp->mutex_owner_pid, root_rp->mutex_owner_tag);
        memset(&root_rp->mutex, 0, sizeof (root_rp->mutex));
        goto out;
    } else {
        clib_warning ("root_rp->mutex OK...\n");
        pthread_mutex_unlock(&root_rp->mutex);
    }
    
 out:
    vec_free (a);
    /*
     * Now that the root region is known to be OK, 
     * fix broken subregions
     */
    subregion_repair(chroot_path);

    if (crash_root_region) {
        clib_warning ("Leaving root region locked on purpose...");
        pthread_mutex_lock(&root_rp->mutex);
        root_rp->mutex_owner_pid = getpid();
        root_rp->mutex_owner_tag = 99;
    }
    svm_region_exit ();
}

int main (int argc, char **argv)
{
    unformat_input_t input;
    int parsed =0;
    char *name;
    char *chroot_path = 0;
    u8 *chroot_u8;

    unformat_init_command_line (&input, argv);

    while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) {
        if (unformat(&input, "show-verbose")) {
            show (chroot_path, 1);
            parsed++;
        } else if (unformat (&input, "show")) {
            show (chroot_path, 0);
            parsed++;
        } else if (unformat (&input, "client-scan")) {
            svm_client_scan(chroot_path);
            parsed++;
        } else if (unformat (&input, "repair")) {
            repair(chroot_path, 0 /* fix it */);
            parsed++;
        } else if (unformat (&input, "crash")) {
            repair (chroot_path, 1 /* crash it */);
            parsed++;
        } else if (unformat (&input, "trace-on %s", &name)) {
            trace (chroot_path, name, 1);
            parsed++;
        } else if (unformat (&input, "trace-off %s", &name)) {
            trace (chroot_path, name, 0);
            parsed++;
        } else if (unformat (&input, "chroot %s", &chroot_u8)) {
            chroot_path = (char *) chroot_u8;
        } else {
            break;
        }
    }

    unformat_free (&input);

    if (!parsed) {
        fformat(stdout, "%s: show | show-verbose | client-scan | trace-on <region-name>\n", argv[0]);
        fformat(stdout, "      trace-off <region-name>\n");
    }
    exit (0);
}
2162'>2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420