Linux Audio

Check our new training course

Embedded Linux Audio

Check our new training course
with Creative Commons CC-BY-SA
lecture materials

Bootlin logo

Elixir Cross Referencer

Loading...
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
/*
 * GPL HEADER START
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 only,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License version 2 for more details (a copy is included
 * in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU General Public License
 * version 2 along with this program; If not, see
 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 * GPL HEADER END
 */
/*
 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright (c) 2011, 2012, Intel Corporation.
 */
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 *
 * lnet/klnds/o2iblnd/o2iblnd.h
 *
 * Author: Eric Barton <eric@bartonsoftware.com>
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/uio.h>

#include <asm/uaccess.h>
#include <asm/io.h>

#include <linux/fs.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/kmod.h>
#include <linux/sysctl.h>
#include <linux/pci.h>

#include <net/sock.h>
#include <linux/in.h>

#define DEBUG_SUBSYSTEM S_LND

#include "../../../include/linux/libcfs/libcfs.h"
#include "../../../include/linux/lnet/lnet.h"
#include "../../../include/linux/lnet/lib-lnet.h"
#include "../../../include/linux/lnet/lnet-sysctl.h"

#include <rdma/rdma_cm.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_fmr_pool.h>

#define IBLND_PEER_HASH_SIZE		101	/* # peer lists */
/* # scheduler loops before reschedule */
#define IBLND_RESCHED			100

#define IBLND_N_SCHED			2
#define IBLND_N_SCHED_HIGH		4

typedef struct {
	int	      *kib_dev_failover;     /* HCA failover */
	unsigned int     *kib_service;	  /* IB service number */
	int	      *kib_min_reconnect_interval; /* first failed connection retry... */
	int	      *kib_max_reconnect_interval; /* ...exponentially increasing to this */
	int	      *kib_cksum;	    /* checksum kib_msg_t? */
	int	      *kib_timeout;	  /* comms timeout (seconds) */
	int	      *kib_keepalive;	/* keepalive timeout (seconds) */
	int	      *kib_ntx;	      /* # tx descs */
	int	      *kib_credits;	  /* # concurrent sends */
	int	      *kib_peertxcredits;    /* # concurrent sends to 1 peer */
	int	      *kib_peerrtrcredits;   /* # per-peer router buffer credits */
	int	      *kib_peercredits_hiw;  /* # when eagerly to return credits */
	int	      *kib_peertimeout;      /* seconds to consider peer dead */
	char	    **kib_default_ipif;     /* default IPoIB interface */
	int	      *kib_retry_count;
	int	      *kib_rnr_retry_count;
	int	      *kib_concurrent_sends; /* send work queue sizing */
	int		 *kib_ib_mtu;		/* IB MTU */
	int	      *kib_map_on_demand;    /* map-on-demand if RD has more fragments
						 * than this value, 0 disable map-on-demand */
	int	      *kib_pmr_pool_size;    /* # physical MR in pool */
	int	      *kib_fmr_pool_size;    /* # FMRs in pool */
	int	      *kib_fmr_flush_trigger; /* When to trigger FMR flush */
	int	      *kib_fmr_cache;	/* enable FMR pool cache? */
	int	      *kib_require_priv_port;/* accept only privileged ports */
	int	      *kib_use_priv_port;    /* use privileged port for active connect */
	/* # threads on each CPT */
	int		 *kib_nscheds;
} kib_tunables_t;

extern kib_tunables_t  kiblnd_tunables;

#define IBLND_MSG_QUEUE_SIZE_V1      8	  /* V1 only : # messages/RDMAs in-flight */
#define IBLND_CREDIT_HIGHWATER_V1    7	  /* V1 only : when eagerly to return credits */

#define IBLND_CREDITS_DEFAULT	8	  /* default # of peer credits */
#define IBLND_CREDITS_MAX	  ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1)  /* Max # of peer credits */

#define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
				     IBLND_MSG_QUEUE_SIZE_V1 :   \
				     *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
				     IBLND_CREDIT_HIGHWATER_V1 : \
				     *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */

#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)

static inline int
kiblnd_concurrent_sends_v1(void)
{
	if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
		return IBLND_MSG_QUEUE_SIZE_V1 * 2;

	if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
		return IBLND_MSG_QUEUE_SIZE_V1 / 2;

	return *kiblnd_tunables.kib_concurrent_sends;
}

#define IBLND_CONCURRENT_SENDS(v)  ((v) == IBLND_MSG_VERSION_1 ? \
				     kiblnd_concurrent_sends_v1() : \
				     *kiblnd_tunables.kib_concurrent_sends)
/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
#define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)

#define IBLND_MSG_SIZE	      (4<<10)		 /* max size of queued messages (inc hdr) */
#define IBLND_MAX_RDMA_FRAGS	 LNET_MAX_IOV	   /* max # of fragments supported */
#define IBLND_CFG_RDMA_FRAGS       (*kiblnd_tunables.kib_map_on_demand != 0 ? \
				    *kiblnd_tunables.kib_map_on_demand :      \
				     IBLND_MAX_RDMA_FRAGS)  /* max # of fragments configured by user */
#define IBLND_RDMA_FRAGS(v)	((v) == IBLND_MSG_VERSION_1 ? \
				     IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)

/************************/
/* derived constants... */
/* Pools (shared by connections on each CPT) */
/* These pools can grow at runtime, so don't need give a very large value */
#define IBLND_TX_POOL			256
#define IBLND_PMR_POOL			256
#define IBLND_FMR_POOL			256
#define IBLND_FMR_POOL_FLUSH		192

/* TX messages (shared by all connections) */
#define IBLND_TX_MSGS()	    (*kiblnd_tunables.kib_ntx)

/* RX messages (per connection) */
#define IBLND_RX_MSGS(v)	    (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
#define IBLND_RX_MSG_BYTES(v)       (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
#define IBLND_RX_MSG_PAGES(v)      ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)

/* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(v)	    IBLND_RX_MSGS(v)
#define IBLND_SEND_WRS(v)	  ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
#define IBLND_CQ_ENTRIES(v)	 (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))

struct kib_hca_dev;

/* o2iblnd can run over aliased interface */
#ifdef IFALIASZ
#define KIB_IFNAME_SIZE	      IFALIASZ
#else
#define KIB_IFNAME_SIZE	      256
#endif

typedef struct {
	struct list_head	   ibd_list;	  /* chain on kib_devs */
	struct list_head	   ibd_fail_list;     /* chain on kib_failed_devs */
	__u32		ibd_ifip;	  /* IPoIB interface IP */
	/** IPoIB interface name */
	char		 ibd_ifname[KIB_IFNAME_SIZE];
	int		  ibd_nnets;	 /* # nets extant */

	unsigned long	   ibd_next_failover;
	int		  ibd_failed_failover; /* # failover failures */
	unsigned int	 ibd_failover;      /* failover in progress */
	unsigned int	 ibd_can_failover;  /* IPoIB interface is a bonding master */
	struct list_head	   ibd_nets;
	struct kib_hca_dev  *ibd_hdev;
} kib_dev_t;

typedef struct kib_hca_dev {
	struct rdma_cm_id   *ibh_cmid;	  /* listener cmid */
	struct ib_device    *ibh_ibdev;	 /* IB device */
	int		  ibh_page_shift;    /* page shift of current HCA */
	int		  ibh_page_size;     /* page size of current HCA */
	__u64		ibh_page_mask;     /* page mask of current HCA */
	int		  ibh_mr_shift;      /* bits shift of max MR size */
	__u64		ibh_mr_size;       /* size of MR */
	int		  ibh_nmrs;	  /* # of global MRs */
	struct ib_mr       **ibh_mrs;	   /* global MR */
	struct ib_pd	*ibh_pd;	    /* PD */
	kib_dev_t	   *ibh_dev;	   /* owner */
	atomic_t	 ibh_ref;	   /* refcount */
} kib_hca_dev_t;

/** # of seconds to keep pool alive */
#define IBLND_POOL_DEADLINE     300
/** # of seconds to retry if allocation failed */
#define IBLND_POOL_RETRY	1

typedef struct {
	int		     ibp_npages;	     /* # pages */
	struct page	    *ibp_pages[0];	   /* page array */
} kib_pages_t;

struct kib_pmr_pool;

typedef struct {
	struct list_head	      pmr_list;	       /* chain node */
	struct ib_phys_buf     *pmr_ipb;		/* physical buffer */
	struct ib_mr	   *pmr_mr;		 /* IB MR */
	struct kib_pmr_pool    *pmr_pool;	       /* owner of this MR */
	__u64		   pmr_iova;	       /* Virtual I/O address */
	int		     pmr_refcount;	   /* reference count */
} kib_phys_mr_t;

struct kib_pool;
struct kib_poolset;

typedef int  (*kib_ps_pool_create_t)(struct kib_poolset *ps,
				     int inc, struct kib_pool **pp_po);
typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po);
typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node);
typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node);

struct kib_net;

#define IBLND_POOL_NAME_LEN     32

typedef struct kib_poolset {
	spinlock_t		ps_lock;		/* serialize */
	struct kib_net	 *ps_net;		 /* network it belongs to */
	char		    ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
	struct list_head	      ps_pool_list;	   /* list of pools */
	struct list_head	      ps_failed_pool_list;    /* failed pool list */
	unsigned long	      ps_next_retry;	  /* time stamp for retry if failed to allocate */
	int		     ps_increasing;	  /* is allocating new pool */
	int		     ps_pool_size;	   /* new pool size */
	int			ps_cpt;			/* CPT id */

	kib_ps_pool_create_t    ps_pool_create;	 /* create a new pool */
	kib_ps_pool_destroy_t   ps_pool_destroy;	/* destroy a pool */
	kib_ps_node_init_t      ps_node_init;	   /* initialize new allocated node */
	kib_ps_node_fini_t      ps_node_fini;	   /* finalize node */
} kib_poolset_t;

typedef struct kib_pool {
	struct list_head	      po_list;		/* chain on pool list */
	struct list_head	      po_free_list;	   /* pre-allocated node */
	kib_poolset_t	  *po_owner;	       /* pool_set of this pool */
	unsigned long	      po_deadline;	    /* deadline of this pool */
	int		     po_allocated;	   /* # of elements in use */
	int		     po_failed;	      /* pool is created on failed HCA */
	int		     po_size;		/* # of pre-allocated elements */
} kib_pool_t;

typedef struct {
	kib_poolset_t	   tps_poolset;	    /* pool-set */
	__u64		   tps_next_tx_cookie;     /* cookie of TX */
} kib_tx_poolset_t;

typedef struct {
	kib_pool_t	      tpo_pool;	       /* pool */
	struct kib_hca_dev     *tpo_hdev;	       /* device for this pool */
	struct kib_tx	  *tpo_tx_descs;	   /* all the tx descriptors */
	kib_pages_t	    *tpo_tx_pages;	   /* premapped tx msg pages */
} kib_tx_pool_t;

typedef struct {
	kib_poolset_t	   pps_poolset;	    /* pool-set */
} kib_pmr_poolset_t;

typedef struct kib_pmr_pool {
	struct kib_hca_dev     *ppo_hdev;	       /* device for this pool */
	kib_pool_t	      ppo_pool;	       /* pool */
} kib_pmr_pool_t;

typedef struct {
	spinlock_t		fps_lock;		/* serialize */
	struct kib_net	 *fps_net;		/* IB network */
	struct list_head	      fps_pool_list;	  /* FMR pool list */
	struct list_head	      fps_failed_pool_list;   /* FMR pool list */
	__u64		   fps_version;	    /* validity stamp */
	int			fps_cpt;		/* CPT id */
	int			fps_pool_size;
	int			fps_flush_trigger;
	/* is allocating new pool */
	int			fps_increasing;
	/* time stamp for retry if failed to allocate */
	unsigned long		fps_next_retry;
} kib_fmr_poolset_t;

typedef struct {
	struct list_head	      fpo_list;	       /* chain on pool list */
	struct kib_hca_dev     *fpo_hdev;	       /* device for this pool */
	kib_fmr_poolset_t      *fpo_owner;	      /* owner of this pool */
	struct ib_fmr_pool     *fpo_fmr_pool;	   /* IB FMR pool */
	unsigned long	      fpo_deadline;	   /* deadline of this pool */
	int		     fpo_failed;	     /* fmr pool is failed */
	int		     fpo_map_count;	  /* # of mapped FMR */
} kib_fmr_pool_t;

typedef struct {
	struct ib_pool_fmr     *fmr_pfmr;	       /* IB pool fmr */
	kib_fmr_pool_t	 *fmr_pool;	       /* pool of FMR */
} kib_fmr_t;

typedef struct kib_net {
	struct list_head	   ibn_list;	  /* chain on kib_dev_t::ibd_nets */
	__u64		ibn_incarnation;   /* my epoch */
	int		  ibn_init;	  /* initialisation state */
	int		  ibn_shutdown;      /* shutting down? */

	atomic_t		ibn_npeers;	/* # peers extant */
	atomic_t		ibn_nconns;	/* # connections extant */

	kib_tx_poolset_t	**ibn_tx_ps;	/* tx pool-set */
	kib_fmr_poolset_t	**ibn_fmr_ps;	/* fmr pool-set */
	kib_pmr_poolset_t	**ibn_pmr_ps;	/* pmr pool-set */

	kib_dev_t		*ibn_dev;	/* underlying IB device */
} kib_net_t;

#define KIB_THREAD_SHIFT		16
#define KIB_THREAD_ID(cpt, tid)		((cpt) << KIB_THREAD_SHIFT | (tid))
#define KIB_THREAD_CPT(id)		((id) >> KIB_THREAD_SHIFT)
#define KIB_THREAD_TID(id)		((id) & ((1UL << KIB_THREAD_SHIFT) - 1))

struct kib_sched_info {
	/* serialise */
	spinlock_t		ibs_lock;
	/* schedulers sleep here */
	wait_queue_head_t		ibs_waitq;
	/* conns to check for rx completions */
	struct list_head		ibs_conns;
	/* number of scheduler threads */
	int			ibs_nthreads;
	/* max allowed scheduler threads */
	int			ibs_nthreads_max;
	int			ibs_cpt;	/* CPT id */
};

typedef struct {
	int			kib_init;	/* initialisation state */
	int			kib_shutdown;	/* shut down? */
	struct list_head		kib_devs;	/* IB devices extant */
	/* list head of failed devices */
	struct list_head		kib_failed_devs;
	/* schedulers sleep here */
	wait_queue_head_t		kib_failover_waitq;
	atomic_t		kib_nthreads;	/* # live threads */
	/* stabilize net/dev/peer/conn ops */
	rwlock_t		kib_global_lock;
	/* hash table of all my known peers */
	struct list_head		*kib_peers;
	/* size of kib_peers */
	int			kib_peer_hash_size;
	/* the connd task (serialisation assertions) */
	void			*kib_connd;
	/* connections to setup/teardown */
	struct list_head		kib_connd_conns;
	/* connections with zero refcount */
	struct list_head		kib_connd_zombies;
	/* connection daemon sleeps here */
	wait_queue_head_t		kib_connd_waitq;
	spinlock_t		kib_connd_lock;	/* serialise */
	struct ib_qp_attr	kib_error_qpa;	/* QP->ERROR */
	/* percpt data for schedulers */
	struct kib_sched_info	**kib_scheds;
} kib_data_t;

#define IBLND_INIT_NOTHING	 0
#define IBLND_INIT_DATA	    1
#define IBLND_INIT_ALL	     2

/************************************************************************
 * IB Wire message format.
 * These are sent in sender's byte order (i.e. receiver flips).
 */

typedef struct kib_connparams {
	__u16	     ibcp_queue_depth;
	__u16	     ibcp_max_frags;
	__u32	     ibcp_max_msg_size;
} WIRE_ATTR kib_connparams_t;

typedef struct {
	lnet_hdr_t	ibim_hdr;	     /* portals header */
	char	      ibim_payload[0];      /* piggy-backed payload */
} WIRE_ATTR kib_immediate_msg_t;

typedef struct {
	__u32	     rf_nob;	       /* # bytes this frag */
	__u64	     rf_addr;	      /* CAVEAT EMPTOR: misaligned!! */
} WIRE_ATTR kib_rdma_frag_t;

typedef struct {
	__u32	     rd_key;	       /* local/remote key */
	__u32	     rd_nfrags;	    /* # fragments */
	kib_rdma_frag_t   rd_frags[0];	  /* buffer frags */
} WIRE_ATTR kib_rdma_desc_t;

typedef struct {
	lnet_hdr_t	ibprm_hdr;	    /* portals header */
	__u64	     ibprm_cookie;	 /* opaque completion cookie */
} WIRE_ATTR kib_putreq_msg_t;

typedef struct {
	__u64	     ibpam_src_cookie;     /* reflected completion cookie */
	__u64	     ibpam_dst_cookie;     /* opaque completion cookie */
	kib_rdma_desc_t   ibpam_rd;	     /* sender's sink buffer */
} WIRE_ATTR kib_putack_msg_t;

typedef struct {
	lnet_hdr_t	ibgm_hdr;	     /* portals header */
	__u64	     ibgm_cookie;	  /* opaque completion cookie */
	kib_rdma_desc_t   ibgm_rd;	      /* rdma descriptor */
} WIRE_ATTR kib_get_msg_t;

typedef struct {
	__u64	     ibcm_cookie;	  /* opaque completion cookie */
	__s32	     ibcm_status;	  /* < 0 failure: >= 0 length */
} WIRE_ATTR kib_completion_msg_t;

typedef struct {
	/* First 2 fields fixed FOR ALL TIME */
	__u32	     ibm_magic;	    /* I'm an ibnal message */
	__u16	     ibm_version;	  /* this is my version number */

	__u8	      ibm_type;	     /* msg type */
	__u8	      ibm_credits;	  /* returned credits */
	__u32	     ibm_nob;	      /* # bytes in whole message */
	__u32	     ibm_cksum;	    /* checksum (0 == no checksum) */
	__u64	     ibm_srcnid;	   /* sender's NID */
	__u64	     ibm_srcstamp;	 /* sender's incarnation */
	__u64	     ibm_dstnid;	   /* destination's NID */
	__u64	     ibm_dststamp;	 /* destination's incarnation */

	union {
		kib_connparams_t      connparams;
		kib_immediate_msg_t   immediate;
		kib_putreq_msg_t      putreq;
		kib_putack_msg_t      putack;
		kib_get_msg_t	 get;
		kib_completion_msg_t  completion;
	} WIRE_ATTR ibm_u;
} WIRE_ATTR kib_msg_t;

#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC	/* unique magic */

#define IBLND_MSG_VERSION_1	 0x11
#define IBLND_MSG_VERSION_2	 0x12
#define IBLND_MSG_VERSION	   IBLND_MSG_VERSION_2

#define IBLND_MSG_CONNREQ	   0xc0	/* connection request */
#define IBLND_MSG_CONNACK	   0xc1	/* connection acknowledge */
#define IBLND_MSG_NOOP	      0xd0	/* nothing (just credits) */
#define IBLND_MSG_IMMEDIATE	 0xd1	/* immediate */
#define IBLND_MSG_PUT_REQ	   0xd2	/* putreq (src->sink) */
#define IBLND_MSG_PUT_NAK	   0xd3	/* completion (sink->src) */
#define IBLND_MSG_PUT_ACK	   0xd4	/* putack (sink->src) */
#define IBLND_MSG_PUT_DONE	  0xd5	/* completion (src->sink) */
#define IBLND_MSG_GET_REQ	   0xd6	/* getreq (sink->src) */
#define IBLND_MSG_GET_DONE	  0xd7	/* completion (src->sink: all OK) */

typedef struct {
	__u32	    ibr_magic;	     /* sender's magic */
	__u16	    ibr_version;	   /* sender's version */
	__u8	     ibr_why;	       /* reject reason */
	__u8	     ibr_padding;	   /* padding */
	__u64	    ibr_incarnation;       /* incarnation of peer */
	kib_connparams_t ibr_cp;		/* connection parameters */
} WIRE_ATTR kib_rej_t;

/* connection rejection reasons */
#define IBLND_REJECT_CONN_RACE       1	  /* You lost connection race */
#define IBLND_REJECT_NO_RESOURCES    2	  /* Out of memory/conns etc */
#define IBLND_REJECT_FATAL	   3	  /* Anything else */

#define IBLND_REJECT_CONN_UNCOMPAT   4	  /* incompatible version peer */
#define IBLND_REJECT_CONN_STALE      5	  /* stale peer */

#define IBLND_REJECT_RDMA_FRAGS      6	  /* Fatal: peer's rdma frags can't match mine */
#define IBLND_REJECT_MSG_QUEUE_SIZE  7	  /* Fatal: peer's msg queue size can't match mine */

/***********************************************************************/

typedef struct kib_rx			   /* receive message */
{
	struct list_head		rx_list;      /* queue for attention */
	struct kib_conn	  *rx_conn;      /* owning conn */
	int		       rx_nob;       /* # bytes received (-1 while posted) */
	enum ib_wc_status	 rx_status;    /* completion status */
	kib_msg_t		*rx_msg;       /* message buffer (host vaddr) */
	__u64		     rx_msgaddr;   /* message buffer (I/O addr) */
	DECLARE_PCI_UNMAP_ADDR   (rx_msgunmap); /* for dma_unmap_single() */
	struct ib_recv_wr	 rx_wrq;       /* receive work item... */
	struct ib_sge	     rx_sge;       /* ...and its memory */
} kib_rx_t;

#define IBLND_POSTRX_DONT_POST    0	     /* don't post */
#define IBLND_POSTRX_NO_CREDIT    1	     /* post: no credits */
#define IBLND_POSTRX_PEER_CREDIT  2	     /* post: give peer back 1 credit */
#define IBLND_POSTRX_RSRVD_CREDIT 3	     /* post: give myself back 1 reserved credit */

typedef struct kib_tx			   /* transmit message */
{
	struct list_head		tx_list;      /* queue on idle_txs ibc_tx_queue etc. */
	kib_tx_pool_t	    *tx_pool;      /* pool I'm from */
	struct kib_conn	  *tx_conn;      /* owning conn */
	short		     tx_sending;   /* # tx callbacks outstanding */
	short		     tx_queued;    /* queued for sending */
	short		     tx_waiting;   /* waiting for peer */
	int		       tx_status;    /* LNET completion status */
	unsigned long	     tx_deadline;  /* completion deadline */
	__u64		     tx_cookie;    /* completion cookie */
	lnet_msg_t	       *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
	kib_msg_t		*tx_msg;       /* message buffer (host vaddr) */
	__u64		     tx_msgaddr;   /* message buffer (I/O addr) */
	DECLARE_PCI_UNMAP_ADDR   (tx_msgunmap); /* for dma_unmap_single() */
	int		       tx_nwrq;      /* # send work items */
	struct ib_send_wr	*tx_wrq;       /* send work items... */
	struct ib_sge	    *tx_sge;       /* ...and their memory */
	kib_rdma_desc_t	  *tx_rd;	/* rdma descriptor */
	int		       tx_nfrags;    /* # entries in... */
	struct scatterlist       *tx_frags;     /* dma_map_sg descriptor */
	__u64		    *tx_pages;     /* rdma phys page addrs */
	union {
		kib_phys_mr_t      *pmr;	/* MR for physical buffer */
		kib_fmr_t	   fmr;	/* FMR */
	}			 tx_u;
	int		       tx_dmadir;    /* dma direction */
} kib_tx_t;

typedef struct kib_connvars {
	/* connection-in-progress variables */
	kib_msg_t		 cv_msg;
} kib_connvars_t;

typedef struct kib_conn {
	struct kib_sched_info *ibc_sched;	/* scheduler information */
	struct kib_peer     *ibc_peer;	  /* owning peer */
	kib_hca_dev_t       *ibc_hdev;	  /* HCA bound on */
	struct list_head	   ibc_list;	  /* stash on peer's conn list */
	struct list_head	   ibc_sched_list;    /* schedule for attention */
	__u16		ibc_version;       /* version of connection */
	__u64		ibc_incarnation;   /* which instance of the peer */
	atomic_t	 ibc_refcount;      /* # users */
	int		  ibc_state;	 /* what's happening */
	int		  ibc_nsends_posted; /* # uncompleted sends */
	int		  ibc_noops_posted;  /* # uncompleted NOOPs */
	int		  ibc_credits;       /* # credits I have */
	int		  ibc_outstanding_credits; /* # credits to return */
	int		  ibc_reserved_credits;/* # ACK/DONE msg credits */
	int		  ibc_comms_error;   /* set on comms error */
	unsigned int	     ibc_nrx:16;	/* receive buffers owned */
	unsigned int	     ibc_scheduled:1;   /* scheduled for attention */
	unsigned int	     ibc_ready:1;       /* CQ callback fired */
	/* time of last send */
	unsigned long	ibc_last_send;
	/** link chain for kiblnd_check_conns only */
	struct list_head	   ibc_connd_list;
	/** rxs completed before ESTABLISHED */
	struct list_head	   ibc_early_rxs;
	/** IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
	struct list_head	   ibc_tx_noops;
	struct list_head	   ibc_tx_queue;       /* sends that need a credit */
	struct list_head	   ibc_tx_queue_nocred;/* sends that don't need a credit */
	struct list_head	   ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
	struct list_head	   ibc_active_txs;     /* active tx awaiting completion */
	spinlock_t	     ibc_lock;		 /* serialise */
	kib_rx_t	    *ibc_rxs;	    /* the rx descs */
	kib_pages_t	 *ibc_rx_pages;       /* premapped rx msg pages */

	struct rdma_cm_id   *ibc_cmid;	   /* CM id */
	struct ib_cq	*ibc_cq;	     /* completion queue */

	kib_connvars_t      *ibc_connvars;       /* in-progress connection state */
} kib_conn_t;

#define IBLND_CONN_INIT	       0	 /* being initialised */
#define IBLND_CONN_ACTIVE_CONNECT     1	 /* active sending req */
#define IBLND_CONN_PASSIVE_WAIT       2	 /* passive waiting for rtu */
#define IBLND_CONN_ESTABLISHED	3	 /* connection established */
#define IBLND_CONN_CLOSING	    4	 /* being closed */
#define IBLND_CONN_DISCONNECTED       5	 /* disconnected */

typedef struct kib_peer {
	struct list_head	   ibp_list;	   /* stash on global peer list */
	lnet_nid_t	   ibp_nid;	    /* who's on the other end(s) */
	lnet_ni_t	   *ibp_ni;	     /* LNet interface */
	atomic_t	 ibp_refcount;       /* # users */
	struct list_head	   ibp_conns;	  /* all active connections */
	struct list_head	   ibp_tx_queue;       /* msgs waiting for a conn */
	__u16		ibp_version;	/* version of peer */
	__u64		ibp_incarnation;    /* incarnation of peer */
	int		  ibp_connecting;     /* current active connection attempts */
	int		  ibp_accepting;      /* current passive connection attempts */
	int		  ibp_error;	  /* errno on closing this peer */
	unsigned long	   ibp_last_alive;     /* when (in jiffies) I was last alive */
} kib_peer_t;

extern kib_data_t      kiblnd_data;

extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);

static inline void
kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
{
	LASSERT (atomic_read(&hdev->ibh_ref) > 0);
	atomic_inc(&hdev->ibh_ref);
}

static inline void
kiblnd_hdev_decref(kib_hca_dev_t *hdev)
{
	LASSERT (atomic_read(&hdev->ibh_ref) > 0);
	if (atomic_dec_and_test(&hdev->ibh_ref))
		kiblnd_hdev_destroy(hdev);
}

static inline int
kiblnd_dev_can_failover(kib_dev_t *dev)
{
	if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */
		return 0;

	if (*kiblnd_tunables.kib_dev_failover == 0) /* disabled */
		return 0;

	if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */
		return 1;

	return dev->ibd_can_failover;
}

#define kiblnd_conn_addref(conn)				\
do {							    \
	CDEBUG(D_NET, "conn[%p] (%d)++\n",		      \
	       (conn), atomic_read(&(conn)->ibc_refcount)); \
	atomic_inc(&(conn)->ibc_refcount);		  \
} while (0)

#define kiblnd_conn_decref(conn)					\
do {									\
	unsigned long flags;						\
									\
	CDEBUG(D_NET, "conn[%p] (%d)--\n",				\
	       (conn), atomic_read(&(conn)->ibc_refcount));		\
	LASSERT_ATOMIC_POS(&(conn)->ibc_refcount);			\
	if (atomic_dec_and_test(&(conn)->ibc_refcount)) {		\
		spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);	\
		list_add_tail(&(conn)->ibc_list,			\
				  &kiblnd_data.kib_connd_zombies);	\
		wake_up(&kiblnd_data.kib_connd_waitq);		\
		spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
	}								\
} while (0)

#define kiblnd_peer_addref(peer)				\
do {							    \
	CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n",		\
	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
	       atomic_read (&(peer)->ibp_refcount));	\
	atomic_inc(&(peer)->ibp_refcount);		  \
} while (0)

#define kiblnd_peer_decref(peer)				\
do {							    \
	CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n",		\
	       (peer), libcfs_nid2str((peer)->ibp_nid),	 \
	       atomic_read (&(peer)->ibp_refcount));	\
	LASSERT_ATOMIC_POS(&(peer)->ibp_refcount);	      \
	if (atomic_dec_and_test(&(peer)->ibp_refcount))     \
		kiblnd_destroy_peer(peer);		      \
} while (0)

static inline struct list_head *
kiblnd_nid2peerlist (lnet_nid_t nid)
{
	unsigned int hash =
		((unsigned int)nid) % kiblnd_data.kib_peer_hash_size;

	return (&kiblnd_data.kib_peers [hash]);
}

static inline int
kiblnd_peer_active (kib_peer_t *peer)
{
	/* Am I in the peer hash table? */
	return (!list_empty(&peer->ibp_list));
}

static inline kib_conn_t *
kiblnd_get_conn_locked (kib_peer_t *peer)
{
	LASSERT (!list_empty(&peer->ibp_conns));

	/* just return the first connection */
	return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
}

static inline int
kiblnd_send_keepalive(kib_conn_t *conn)
{
	return (*kiblnd_tunables.kib_keepalive > 0) &&
		cfs_time_after(jiffies, conn->ibc_last_send +
			       *kiblnd_tunables.kib_keepalive*HZ);
}

static inline int
kiblnd_need_noop(kib_conn_t *conn)
{
	LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);

	if (conn->ibc_outstanding_credits <
	    IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
	    !kiblnd_send_keepalive(conn))
		return 0; /* No need to send NOOP */

	if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
		if (!list_empty(&conn->ibc_tx_queue_nocred))
			return 0; /* NOOP can be piggybacked */

		/* No tx to piggyback NOOP onto or no credit to send a tx */
		return (list_empty(&conn->ibc_tx_queue) ||
			conn->ibc_credits == 0);
	}

	if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
	    !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
	    conn->ibc_credits == 0)		    /* no credit */
		return 0;

	if (conn->ibc_credits == 1 &&      /* last credit reserved for */
	    conn->ibc_outstanding_credits == 0) /* giving back credits */
		return 0;

	/* No tx to piggyback NOOP onto or no credit to send a tx */
	return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
}

static inline void
kiblnd_abort_receives(kib_conn_t *conn)
{
	ib_modify_qp(conn->ibc_cmid->qp,
		     &kiblnd_data.kib_error_qpa, IB_QP_STATE);
}

static inline const char *
kiblnd_queue2str (kib_conn_t *conn, struct list_head *q)
{
	if (q == &conn->ibc_tx_queue)
		return "tx_queue";

	if (q == &conn->ibc_tx_queue_rsrvd)
		return "tx_queue_rsrvd";

	if (q == &conn->ibc_tx_queue_nocred)
		return "tx_queue_nocred";

	if (q == &conn->ibc_active_txs)
		return "active_txs";

	LBUG();
	return NULL;
}

/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
 * lowest bits of the work request id to stash the work item type. */

#define IBLND_WID_TX    0
#define IBLND_WID_RDMA  1
#define IBLND_WID_RX    2
#define IBLND_WID_MASK  3UL

static inline __u64
kiblnd_ptr2wreqid (void *ptr, int type)
{
	unsigned long lptr = (unsigned long)ptr;

	LASSERT ((lptr & IBLND_WID_MASK) == 0);
	LASSERT ((type & ~IBLND_WID_MASK) == 0);
	return (__u64)(lptr | type);
}

static inline void *
kiblnd_wreqid2ptr (__u64 wreqid)
{
	return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK);
}

static inline int
kiblnd_wreqid2type (__u64 wreqid)
{
	return (wreqid & IBLND_WID_MASK);
}

static inline void
kiblnd_set_conn_state (kib_conn_t *conn, int state)
{
	conn->ibc_state = state;
	mb();
}

static inline void
kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob)
{
	msg->ibm_type = type;
	msg->ibm_nob  = offsetof(kib_msg_t, ibm_u) + body_nob;
}

static inline int
kiblnd_rd_size (kib_rdma_desc_t *rd)
{
	int   i;
	int   size;

	for (i = size = 0; i < rd->rd_nfrags; i++)
		size += rd->rd_frags[i].rf_nob;

	return size;
}

static inline __u64
kiblnd_rd_frag_addr(kib_rdma_desc_t *rd, int index)
{
	return rd->rd_frags[index].rf_addr;
}

static inline __u32
kiblnd_rd_frag_size(kib_rdma_desc_t *rd, int index)
{
	return rd->rd_frags[index].rf_nob;
}

static inline __u32
kiblnd_rd_frag_key(kib_rdma_desc_t *rd, int index)
{
	return rd->rd_key;
}

static inline int
kiblnd_rd_consume_frag(kib_rdma_desc_t *rd, int index, __u32 nob)
{
	if (nob < rd->rd_frags[index].rf_nob) {
		rd->rd_frags[index].rf_addr += nob;
		rd->rd_frags[index].rf_nob  -= nob;
	} else {
		index ++;
	}

	return index;
}

static inline int
kiblnd_rd_msg_size(kib_rdma_desc_t *rd, int msgtype, int n)
{
	LASSERT (msgtype == IBLND_MSG_GET_REQ ||
		 msgtype == IBLND_MSG_PUT_ACK);

	return msgtype == IBLND_MSG_GET_REQ ?
	       offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]) :
	       offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
}


static inline __u64
kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
	return ib_dma_mapping_error(dev, dma_addr);
}

static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
					  void *msg, size_t size,
					  enum dma_data_direction direction)
{
	return ib_dma_map_single(dev, msg, size, direction);
}

static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
					   __u64 addr, size_t size,
					  enum dma_data_direction direction)
{
	ib_dma_unmap_single(dev, addr, size, direction);
}

#define KIBLND_UNMAP_ADDR_SET(p, m, a)  do {} while (0)
#define KIBLND_UNMAP_ADDR(p, m, a)      (a)

static inline int kiblnd_dma_map_sg(struct ib_device *dev,
				    struct scatterlist *sg, int nents,
				    enum dma_data_direction direction)
{
	return ib_dma_map_sg(dev, sg, nents, direction);
}

static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
				       struct scatterlist *sg, int nents,
				       enum dma_data_direction direction)
{
	ib_dma_unmap_sg(dev, sg, nents, direction);
}

static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
					  struct scatterlist *sg)
{
	return ib_sg_dma_address(dev, sg);
}

static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
					     struct scatterlist *sg)
{
	return ib_sg_dma_len(dev, sg);
}

/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly
 * right because OFED1.2 defines it as const, to use it we have to add
 * (void *) cast to overcome "const" */

#define KIBLND_CONN_PARAM(e)	    ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e)	((e)->param.conn.private_data_len)


struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
				    kib_rdma_desc_t *rd);
struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev,
				 __u64 addr, __u64 size);
void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn);
int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
		  kib_rdma_desc_t *rd, int nfrags);
void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx);
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);

int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
			 int npages, __u64 iov, kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);

int  kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
			 kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr);
void kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr);

int  kiblnd_startup (lnet_ni_t *ni);
void kiblnd_shutdown (lnet_ni_t *ni);
int  kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
void kiblnd_query (struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when);

int  kiblnd_tunables_init(void);
void kiblnd_tunables_fini(void);

int  kiblnd_connd (void *arg);
int  kiblnd_scheduler(void *arg);
int  kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
int  kiblnd_failover_thread (void *arg);

int  kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
void kiblnd_free_pages (kib_pages_t *p);

int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
			struct rdma_cm_event *event);
int  kiblnd_translate_mtu(int value);

int  kiblnd_dev_failover(kib_dev_t *dev);
int  kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer (kib_peer_t *peer);
void kiblnd_destroy_dev (kib_dev_t *dev);
void kiblnd_unlink_peer_locked (kib_peer_t *peer);
void kiblnd_peer_alive (kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error);
int  kiblnd_close_stale_conns_locked (kib_peer_t *peer,
				      int version, __u64 incarnation);
int  kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);

void kiblnd_connreq_done(kib_conn_t *conn, int status);
kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid,
				int state, int version);
void kiblnd_destroy_conn (kib_conn_t *conn);
void kiblnd_close_conn (kib_conn_t *conn, int error);
void kiblnd_close_conn_locked (kib_conn_t *conn, int error);

int  kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
		       int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);

void kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
void kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist,
			 int status);
void kiblnd_check_sends (kib_conn_t *conn);

void kiblnd_qp_event(struct ib_event *event, void *arg);
void kiblnd_cq_event(struct ib_event *event, void *arg);
void kiblnd_cq_completion(struct ib_cq *cq, void *arg);

void kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, int version,
		      int credits, lnet_nid_t dstnid, __u64 dststamp);
int  kiblnd_unpack_msg(kib_msg_t *msg, int nob);
int  kiblnd_post_rx (kib_rx_t *rx, int credit);

int  kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int  kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
		 unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
		 unsigned int offset, unsigned int mlen, unsigned int rlen);