gltut / Documents / Positioning / Tutorial 04.xml

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="http://docbook.org/xml/5.0/rng/docbookxi.rng" type="xml"?>
<?oxygen SCHSchema="http://docbook.org/xml/5.0/rng/docbookxi.rng"?>
<chapter xmlns="http://docbook.org/ns/docbook" xmlns:xi="http://www.w3.org/2001/XInclude"
    xmlns:xlink="http://www.w3.org/1999/xlink" version="5.0">
    <?dbhtml filename="Tutorial 04.html" ?>
    <title>Objects at Rest</title>
    <para>Thus far, we have seen very flat things. Namely, a single triangle. Maybe the triangle
        moved around or had some colors.</para>
    <para>This tutorial is all about how to create a realistic world of objects.</para>
    <section>
        <title>The Unreal World</title>
        <para>The <phrase role="propername">Orthographic Cube</phrase> tutorial renders a
            rectangular prism (a 3D rectangle). The dimensions of the prism are 0.5x0.5x1.5, so it
            is longer in the Z axis by 3x the X and Y.</para>
        <para>The code in this tutorial should be familiar, for the most part. We simply draw 12
            triangles rather than one. The rectangular faces of the prism are made of 2 triangles,
            splitting the face along one of the diagonals.</para>
        <para>The vertices also have a color. However, the color for the 6 vertices that make up a
            face is always the same; this gives each face a single, uniform color.</para>
        <para>The vertex shader is a combination of things we know. It passes a color through to the
            fragment stage, but it also takes a <type>vec2</type> offset uniform that it adds an
            offset to the X and Y components of the position. The fragment shader simply takes the
            interpolated color and uses it as the output color.</para>
        <section xml:id="tut04_face_culling">
            <title>Face Culling</title>
            <para>There is one very noteworthy code change, however: the initialization routine. It
                has a few new functions that need to be discussed.</para>
            <example>
                <title>Face Culling Initialization</title>
                <programlisting language="cpp"><![CDATA[void init()
{
    InitializeProgram();
    InitializeVertexBuffer();
    
    glGenVertexArrays(1, &vao);
    glBindVertexArray(vao);
    
    glEnable(GL_CULL_FACE);
    glCullFace(GL_BACK);
    glFrontFace(GL_CW);
}]]></programlisting>
            </example>
            <para>The last three lines are new.</para>
            <para>The <function>glEnable</function> function is a multi-purpose tool. There are a
                lot of binary on/off flags that are part of OpenGL's state.
                    <function>glEnable</function> is used to set these flags to the
                    <quote>on</quote> position. Similarly, there is a <function>glDisable</function>
                function that sets the flag to <quote>off.</quote></para>
            <para>The <literal>GL_CULL_FACE</literal> flag, when enabled, tells OpenGL to activate
                    <glossterm>face culling</glossterm>. Up until now, we have been rendering with
                face culling off.</para>
            <para>Face culling is a useful feature for saving performance. Take our rectangular
                prism, for example. Pick up a remote control; their general shape is that of a
                rectangular prism. No matter how you look at it or orient it, you can never see more
                than 3 sides of it at once. So why bother spending all that fragment processing time
                drawing the other three sides?</para>
            <para>Face culling is a way of telling OpenGL not to draw the sides of an object that
                you cannot see. It is quite simple, really.</para>
            <para>In window space, after the transform from normalized device coordinates, you have
                a triangle. Each vertex of that triangle was presented to OpenGL in a specific
                order. This gives you a way of numbering the vertices of the triangle.</para>
            <para>No matter what size or shape the triangle is, you can classify the ordering of a
                triangle in two ways: clockwise or counter-clockwise. That is, if the order of the
                vertices from 1 to 2 to 3 moves clockwise in a circle, relative to the triangle's
                center, then the triangle is facing clockwise relative to the viewer. Otherwise, the
                triangle is counter-clockwise relative to the viewer. This ordering is called the
                    <glossterm>winding order.</glossterm></para>
            <figure>
                <title>Triangle Winding Order</title>
                <mediaobject>
                    <imageobject>
                        <imagedata  fileref="WindingOrder.svg" contentwidth="6in"/>
                    </imageobject>
                </mediaobject>
                <caption>
                    <para>The left triangle has a clockwise winding order; the triangle on the right
                        has a counter-clockwise winding order.</para>
                </caption>
            </figure>
            <para>Face culling in OpenGL works based on this ordering. Setting this is a two-step
                process, and is accomplished by the last two statements of the initialization
                function.</para>
            <para>The <function>glFrontFace</function> defines which winding order, clockwise or
                counter-clockwise, is considered to be the <quote>front</quote> side of the
                triangle. This function can be given either <literal>GL_CW</literal> or
                    <literal>GL_CCW</literal>, for clockwise and counter-clockwise
                respectively.</para>
            <para>The <function>glCullFace</function> function defines which side, front or back,
                gets culled. This can be given <literal>GL_BACK</literal>,
                    <literal>GL_FRONT</literal>, or <literal>GL_FRONT_AND_BACK</literal>. The latter
                culls <emphasis>everything,</emphasis> so no triangles are rendered. This can be
                useful for measuring vertex shader performance but is less useful for actually
                drawing anything.</para>
            <para>The triangle data in the tutorial is specifically ordered so that the clockwise
                facing of the triangles face out. This prevents the drawing of the rear-facing
                faces.</para>
        </section>
        <section>
            <title>Lack of Perspective</title>
            <para>So, the image looks like this:</para>
            <figure>
                <title>Orthographic Prism</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="OrthoPrism.png" contentwidth="3in"/>
                    </imageobject>
                </mediaobject>
            </figure>
            <para>There's something wrong with this. Namely, that it looks like a square.</para>
            <para>Pick up a remote control again. Point it directly at your eye and position it so
                that it is in the center of your vision. You should only be able to see the front
                panel of the remote.</para>
            <para>Now, move it to the right and up, similar to where the square is. You should be
                able to see the bottom and left side of the remote.</para>
            <para>So we should be able to see the bottom and left side of our rectangular prism. But
                we cannot. Why not?</para>
            <para>Think back to how rendering happens. In clip-space, the vertices of the back end
                of the rectangular prism are directly behind the front end. And when we transform
                these into window coordinates, the back vertices are still directly behind the front
                vertices. This is what the rasterizer sees, so this is what the rasterizer
                renders.</para>
            <para>There has to be something that reality is doing that we are not. That something is
                called <quote>perspective.</quote></para>
        </section>
    </section>
    <section xml:id="ShaderPerspective">
        <?dbhtml filename="Tut04 Perspective Projection.html" ?>
        <title>Perspective Projection</title>
        <para>A <glossterm>projection</glossterm>, for the purposes of rendering, is a way to
            transform a world from one dimensionality to another. Our destination image is two
            dimensional, and our initial world is three dimensional. Thus, we need a way to
            transform this 3D world into a 2D one.</para>
        <para>Finite projections, which are the ones we are interested in, only project objects onto
            a finite space of the lower dimensionality. For a 3D to 2D projection, there is a finite
            plane on which the world is projected. For 2D to 1D, there is a bounded line that is the
            result of the projection.</para>
        <para>An <glossterm>orthographic projection</glossterm> is a very simplistic projection.
            When projecting onto an axis-aligned surface, as above, the projection simply involves
            throwing away the coordinate perpendicular to the surface.</para>
        <figure>
            <title>2D to 1D Orthographic Projection</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="Ortho2DProjection.svg"  contentwidth="4in"/>
                </imageobject>
            </mediaobject>
            <caption>
                <para>A scene orthographically projected onto the black line. The gray box
                    represents the part of the world that is visible to the projection; parts of the
                    scene outside of this region are not seen.</para>
            </caption>
        </figure>
        <para>When projecting onto an arbitrary line, the math is a bit more complicated. But what
            makes it an orthographic projection is that the dimension perpendicular to the surface
            is negated uniformly to create the projection. The fact that it is a projection in the
            direction of the perpendicular and that it is uniform is what makes it
            orthographic.</para>
        <para>Human eyes do not see the world via orthographic projection. If they did, you would
            only be able to see an area of the world the size of your pupils. Because we do not use
            orthographic projections to see (among other reasons), orthographic projections do not look
            particularly real to us.</para>
        <para>Instead, we use a pinhole camera model for our eyesight. This model performs a
                <glossterm>perspective projection</glossterm>. A perspective projection is a
            projection of the world on a surface as though seen through a single point. A 2D to 1D
            perspective projection looks like this:</para>
        <figure>
            <title>2D to 1D Perspective Projection</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="Persp2DProjection.svg"  contentwidth="4in"/>
                </imageobject>
            </mediaobject>
        </figure>
        <para>As you can see, the projection is radial, based on the location of a particular point.
            That point is the eye or camera of the projection.</para>
        <para>Just from the shape of the projection, we can see that the perspective projection
            causes a larger field of geometry to be projected onto the surface. An orthographic
            projection only captures the rectangular prism directly in front of the surface of
            projection. A perspective projection captures a larger space of the world.</para>
        <para>In 2D, the shape of the perspective projection is a regular trapezoid (a quadrilateral
            that has only one pair of parallel sides, and the other pair of sides have the same
            slope). In 3D, the shape is called a <glossterm>frustum</glossterm>; essentially, a
            pyramid with the tip chopped off.</para>
        <figure>
            <title>Viewing Frustum</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="ViewFrustum.svg"  contentwidth="4in"/>
                </imageobject>
            </mediaobject>
        </figure>
        <section>
            <title>Mathematical Perspective</title>
            <para>Now that we know what we want to do, we just need to know how to do it.</para>
            <para>We will be making a few simplifying assumptions:</para>
            <itemizedlist>
                <listitem>
                    <para>The plane of projection is axis-aligned and faces down the -Z axis. Thus,
                        -Z is farther away from the plane of projection.</para>
                </listitem>
                <listitem>
                    <para>The eye point is fixed at the origin (0, 0, 0).</para>
                </listitem>
                <listitem>
                    <para>The size of the plane of projection will be [-1, 1]. All points that
                        project outside of this range are not drawn.</para>
                </listitem>
            </itemizedlist>
            <para>Yes, this sounds somewhat like normalized device coordinate space. No, that's not
                a coincidence. But let's not get ahead of ourselves.</para>
            <para>We know a few things about how the projection results will work. A perspective
                projection essentially shifts vertices towards the eye, based on the location of
                that particular vertex. Vertices farther in Z from the front of the projection are
                shifted less than those closer to the eye. And the shift also depends on how far the
                vertices are from the center of the plane of projection, in the X,Y
                direction.</para>
            <para>The problem is really just a simple geometry problem. Here is the equivalent form
                in a 2D to 1D perspective projection.</para>
            <figure>
                <title>2D to 1D Perspective Projection Diagram</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="PerspDiagram.svg"  contentwidth="4in"/>
                    </imageobject>
                </mediaobject>
                <caption>
                    <para>The projection of the point P onto the projection plane. This plane is at
                        an offset of E<subscript>z</subscript> compared to the eye point, which is
                        fixed at the origin. R is the projected point.</para>
                </caption>
            </figure>
            <para>What we have are two similar right triangles: the triangle formed by E, R and
                    E<subscript>z</subscript>, and the triangle formed by E, P, and
                    P<subscript>z</subscript>. We have the eye position and the position of the
                unprojected point. To find the location of R, we simply do this:</para>
            <equation>
                <title>Perspective Computation</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="PerspectiveFunc.svg" width="300" 
                            contentwidth="3in"/>
                    </imageobject>
                </mediaobject>
            </equation>
            <para>Since this is a vectorized function, this solution applies equally to 2D as to 3D.
                Thus, perspective projection is simply the task of applying that simple formula to
                every vertex that the vertex shader receives.</para>
        </section>
        <section>
            <title>The Perspective Divide</title>
            <para>The basic perspective projection function is simple. Really simple. Indeed, it is
                so simple that it has been built into graphics hardware since the days of the
                earliest 3Dfx card and even prior graphics hardware.</para>
            <para>You might notice that the scaling can be expressed as a division operation
                (multiplying by the reciprocal). And you may recall that the difference between clip
                space and normalized device coordinate space is a division by the W coordinate. So
                instead of doing the divide in the shader, we can simply set the W coordinate of
                each vertex correctly and let the hardware handle it.</para>
            <para>This step, the conversion from clip-space to normalized device coordinate space,
                has a particular name: the <glossterm>perspective divide</glossterm>. So named
                because it is usually used for perspective projections; orthographic projections
                tend to have the W coordinates be 1.0, thus making the perspective divide a
                no-op.</para>
            <note>
                <para>You may be wondering why this arbitrary division-by-W step exists. You may
                    also be wondering, in this modern days of vertex shaders that can do vector
                    divisions very quickly, why we should bother to use the hardware division-by-W
                    step at all. There are several reasons. One we will cover in just a bit when we deal
                    with matrices. More important ones will be covered in future tutorials. Suffice it to
                    say that there are very good reasons to put the perspective term in the W
                    coordinate of clip space vertices.</para>
            </note>
        </section>
        <section>
            <title>Camera Perspective</title>
            <para>Before we can actually implement perspective projection, we need to deal with a
                new issue. The orthographic projection transform was essentially a no-op. It is
                automatic, by the nature of how OpenGL uses the clip space vertices output by the
                vertex shader. The perspective projection transform is a bit more involved. Indeed,
                it fundamentally changes the nature of the world.</para>
            <para>Previously, we were dealing directly in clip space. If we are to use a perspective
                projection to transform vertices <emphasis>into</emphasis> clip space, rather than
                using clip-space vertices directly, we must first define what the space of our
                vertices are in <emphasis>before</emphasis> the transform. This definition will help
                us to define how we do the perspective projection transformation.</para>
            <para>Thus, we define a new space called <glossterm>camera space.</glossterm> This is
                not a space that OpenGL recognizes; it is purely an arbitrary user construction.
                However, it can be useful to define a particular camera space based on what we know
                of our perspective projection. This minimizes the differences between camera space
                and the perspective form of clip space, and it can simplify our perspective
                projection logic.</para>
            <para>The volume of camera space will range from positive infinity to negative infinity
                in all directions. Positive X extends right, positive Y extends up, and positive Z
                is <emphasis>forward</emphasis>. The last one is a change from clip space, where
                positive Z is away.</para>
            <para>Our perspective projection transform will be specific to this space. As previously
                stated, the projection plane shall be a region [-1, 1] in the X and Y axes, and at a
                Z value of -1. The projection will be from vertices in the -Z direction onto this
                plane; vertices that have a positive Z value are behind the projection plane.</para>
            <para>Now, we will make one more simplifying assumption: the location of the center of
                the perspective plane is fixed at (0, 0, -1) in camera space. Therefore, since the
                projection plane is pointing down the -Z axis, eye's location relative to the plane
                of projection is (0, 0, -1). Thus, the E<subscript>z</subscript> value, the offset
                from the projection plane to the eye, is always -1. This means that our perspective
                term, when phrased as division rather than multiplication, is simply
                    P<subscript>z</subscript>/-1: the negation of the camera-space Z
                coordinate.</para>
            <para>Having a fixed eye position and projection plane makes it difficult to have
                zoom-in/zoom-out style effects. This would normally be done by moving the plane
                relative to the fixed eye point. There is a way to do this, however. All you need to
                do is, when transforming from camera space to clip space, scale all of the X and Y
                values by a constant. What this does is make the world, as the camera sees it,
                smaller or larger in the X and Y axes. It effectively makes the frustum wider or
                narrower.</para>
            <para>To compare, camera space and normalized device coordinate space (after the
                perspective divide) look like this, using a 2D version of a perspective
                projection:</para>
            <figure>
                <title>Camera to NDC Transformation in 2D</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="CameraToPerspective.svg"  contentwidth="6in"/>
                    </imageobject>
                </mediaobject>
            </figure>
            <para>Do note that this diagram has the Z axis flipped from camera space and normalized
                device coordinate (NDC) space. This is because camera space and NDC space have
                different viewing directions. In camera space, the camera looks down the -Z axis;
                more negative Z values are farther away. In NDC space, the camera looks down the +Z
                axis; more positive Z values are farther away. The diagram flips the axis so that
                the viewing direction can remain the same between the two images (up is
                away).</para>
            <para>If you perform an orthographic projection from NDC space on the right (by dropping
                the Z coordinate), then what you get is a perspective projection of the world on the
                left. In effect, what we have done is transform objects into a three-dimensional
                space from which an orthographic projection will look like a perspective one.</para>
        </section>
        <section>
            <title>Perspective in Depth</title>
            <para>So we know what to do with the X and Y coordinates. But what does the Z value mean
                in a perspective projection?</para>
            <para>Until the next tutorial, we are going to ignore the <emphasis>meaning</emphasis>
                of Z. Even so, we still need some kind of transform for it; if a vertex extends
                outside of the [-1, 1] box in any axis in normalized device coordinate (NDC) space,
                then it is outside of the viewing area. And because the Z coordinate undergoes the
                perspective divide just like the X and Y coordinates, we need to take this into
                account if we actually want to see anything in our projection.</para>
            <para>Our W coordinate will be based on the camera-space Z coordinate. We need to map Z
                values from the camera-space range [0, -∞) to the NDC space range [-1, 1]. Since
                camera space is an infinite range and we're trying to map to a finite range, we need
                to do some range bounding. The frustum is already finitely bound in the X and Y
                directions; we simply need to add a Z boundary.</para>
            <para>The maximum distance that a vertex can be before it is considered no longer in
                view is the <glossterm>camera zFar</glossterm>. We also have a minimum distance from
                the eye; this is called the <glossterm>camera zNear</glossterm>. This creates a
                finite frustum for our camera space viewpoint.</para>
            <note>
                <para>It is very important to remember that these are the zNear and zFar for the
                        <emphasis>camera</emphasis> space. The next tutorial will also introduce a
                    range of depth, also using the names zNear and zFar. This is a related but
                    fundamentally different range.</para>
            </note>
            <para>The camera zNear can appear to effectively determine the offset between the eye
                and the projection plane. However, this is not the case. Even if zNear is less than
                1, which would place the near Z plane <emphasis>behind</emphasis> the projection
                plane, you still get an effectively valid projection. Objects behind the plane can
                be projected onto the plane just as well as those in front of it; it is still a
                perspective projection. Mathematically, this works.</para>
            <para>What it does <emphasis>not</emphasis> do is what you would expect if you moved the
                plane of projection. Since the plane of projection has a fixed size (the range [-1,
                1]), moving the plane would alter where points appear in the projection. Changing
                the camera zNear does not affect the X, Y position of points in the
                projection.</para>
            <para>There are several ways to go about mapping one finite range to another. One
                confounding problem is the perspective divide itself; it is easy to perform a linear
                mapping between two finite spaces. It is quite another to do a mapping that remains
                linear <emphasis>after</emphasis> the perspective divide. Since we will be dividing
                by -Z itself (the camera-space Z, not the clip-space Z), the math is much more
                complex than you might expect.</para>
            <para>For reasons that will be better explained in the next tutorial, we will use this
                modestly complicated function to compute the clip-space Z:</para>
            <equation>
                <title>Depth Computation</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="ZValueFunc.svg" width="500"  contentwidth="4in"/>
                    </imageobject>
                </mediaobject>
            </equation>
            <para>Some important things about this equation and camera zNear/zFar. First, these
                values are <emphasis>positive</emphasis>; the equation accounts for this when
                performing the transformation. Also, zNear <emphasis>cannot</emphasis> be 0; it can
                be very close to zero, but it must never be exactly zero.</para>
            <para>Let us review the previous diagram of camera-to-NDC transformation in 2D
                space:</para>
            <informalfigure>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="CameraToPerspective.svg"  contentwidth="6in"/>
                    </imageobject>
                </mediaobject>
            </informalfigure>
            <para>The example of 2D camera-space vs. 2D NDC space uses this equation to compute the
                Z values. Take a careful look at how the Z coordinates match. The Z distances are
                evenly spaced in camera space, but in NDC space, they are non-linearly distributed.
                And yet simultaneously, points that are colinear in camera-space remain colinear in
                NDC space.</para>
            <para>This fact has some interesting properties that we will investigate further in the
                next tutorial.</para>
        </section>
        <section>
            <title>Drawing in Perspective</title>
            <para>Given all of the above, we now have a specific sequence of steps to transform a
                vertex from camera space to clip space. These steps are as follows:</para>
            <orderedlist>
                <listitem>
                    <para>Frustum adjustment: multiply the X and Y value of the camera-space
                        vertices by a constant.</para>
                </listitem>
                <listitem>
                    <para>Depth adjustment: modify the Z value from camera space to clip space, as
                        above.</para>
                </listitem>
                <listitem>
                    <para>Perspective division term: compute the W value, where
                            E<subscript>z</subscript> is -1.</para>
                </listitem>
            </orderedlist>
            <para>Now that we have all the theory down, we are ready to put things properly in
                perspective. This is done in the <phrase role="propername"
                    >ShaderPerspective</phrase> tutorial.</para>
            <para>Our new vertex shader, <filename>data\ManualPerspective.vert</filename> looks like
                this:</para>
            <example>
                <title>ManualPerspective Vertex Shader</title>
                <programlisting language="glsl"><![CDATA[#version 330

layout(location = 0) in vec4 position;
layout(location = 1) in vec4 color;

smooth out vec4 theColor;

uniform vec2 offset;
uniform float zNear;
uniform float zFar;
uniform float frustumScale;

void main()
{
    vec4 cameraPos = position + vec4(offset.x, offset.y, 0.0, 0.0);
    vec4 clipPos;
    
    clipPos.xy = cameraPos.xy * frustumScale;
    
    clipPos.z = cameraPos.z * (zNear + zFar) / (zNear - zFar);
    clipPos.z += 2 * zNear * zFar / (zNear - zFar);
    
    clipPos.w = -cameraPos.z;
    
    gl_Position = clipPos;
    theColor = color;
}]]></programlisting>
            </example>
            <para>We have a few new uniforms, but the code itself is only modestly complex.</para>
            <para>The first statement simply applies an offset, just like the vertex shaders we have
                seen before. It positions the object in camera space, so that it is offset from the
                center of the view. This is here to make it easier to position the object for
                projection.</para>
            <para>The next statement performs a scalar multiply of the camera-space X and Y
                positions, storing them in a temporary 4-dimensional vector. From there, we compute
                the clip Z position based on the formula discussed earlier.</para>
            <para>The W coordinate of the clip space position is the Z distance in camera space
                divided by the Z distance from the plane (at the origin) to the eye. The eye is
                fixed at (0, 0, -1), so this leaves us with the negation of the camera space Z
                position. OpenGL will automatically perform the division for us.</para>
            <para>After that, we simply store the clip space position where OpenGL needs it, store
                the color, and we're done. The fragment shader is unchanged.</para>
            <para>With all of the new uniforms, our program initialization routine has
                changed:</para>
            <example>
                <title>Program Initialization</title>
                <programlisting language="cpp"><![CDATA[offsetUniform = glGetUniformLocation(theProgram, "offset");

frustumScaleUnif = glGetUniformLocation(theProgram, "frustumScale");
zNearUnif = glGetUniformLocation(theProgram, "zNear");
zFarUnif = glGetUniformLocation(theProgram, "zFar");

glUseProgram(theProgram);
glUniform1f(frustumScaleUnif, 1.0f);
glUniform1f(zNearUnif, 1.0f);
glUniform1f(zFarUnif, 3.0f);
glUseProgram(0);]]></programlisting>
            </example>
            <para>We only set the new uniforms once. The scale of 1.0 means effectively no change.
                We define the Z to go from -1 to -3 (remember that, in our Z equation, the zNear and
                zFar are positive but refer to negative values).</para>
            <para>The location of the prism has also changed. In the original tutorial, it was
                located on the 0.75 range in Z. Because camera space has a very different Z from
                clip space, this had to change. Now, the Z location of the prism is between -1.25
                and -2.75.</para>
            <para>All of this leaves us with this result:</para>
            <figure>
                <title>Perspective Prism</title>
                <mediaobject>
                    <imageobject>
                        <imagedata fileref="PerspectivePrism.png" contentwidth="3in"/>
                    </imageobject>
                </mediaobject>
            </figure>
            <para>Now, it looks like a rectangular prism. A bright, colorful, unrealistic
                one.</para>
        </section>
        <section>
            <title>Vector Math</title>
            <para>We glossed over something in the vertex shader that bears more discussion. Namely,
                this line:</para>
            <programlisting language="glsl">clipPos.xy = cameraPos.xy * frustumScale;</programlisting>
            <para>Even if you are familiar with vector math libraries in other languages, this code
                should be rather odd. Traditional vector libraries allow you to write selectors like
                    <literal>vec.x</literal> and <literal>vec.w</literal> in order to get a specific
                field from a vector. So what does something like <literal>vec.xy</literal>
                mean?</para>
            <para>Well, it means the obvious; this expression returns a 2D vector
                (<type>vec2</type>), since there are only two components mentioned (X and Y). This
                vector will have its first component come from the X component of
                    <varname>vec</varname> and the second component come from the Y component of
                    <varname>vec</varname>. This kind of selection is called, in GLSL parlance,
                    <glossterm>swizzle selection.</glossterm> The size of the returned vector will
                be the number of components you mention, and the order of these components will
                dictate the order of the components returned.</para>
            <para>You can do any kind of swizzle operation on a vector, so long as you keep in mind
                the following rules:</para>
            <itemizedlist>
                <listitem>
                    <para>You cannot select components that are not in the source vector. So if you
                        have:</para>
                    <programlisting language="glsl">vec2 theVec;</programlisting>
                    <para>You cannot do <literal>theVec.zz</literal> because it has no Z component.</para>
                </listitem>
                <listitem>
                    <para>You cannot select more than 4 components.</para>
                </listitem>
            </itemizedlist>
            <para>These are the only rules. So you can have a <type>vec2</type> that you swizzle to
                create a <type>vec4</type> (<literal>vec.yyyx</literal>); you can repeat components;
                etc. Anything goes so long as you stick to those rules.</para>
            <para>You should also assume that swizzling is fast. This is not true of most CPU-based
                vector hardware, but since the earliest days of programmable GPUs, swizzle selection
                has been a prominent feature. In the early programmable days, swizzles caused
                    <emphasis>no</emphasis> performance loss; in all likelihood, this has not
                changed.</para>
            <para>Swizzle selection can also be used on the left side of the equals, as we have done
                here. It allows you to set specific components of a vector without changing the
                other components.</para>
            <para>When you multiply a vector by a scalar (non-vector value), it does a
                component-wise multiply, returning a vector containing the scalar multiplied by each
                of the components of the vector. We could have written the above line as
                follows:</para>
            <programlisting language="glsl">clipPos.x = cameraPos.x * frustumScale;
clipPos.y = cameraPos.y * frustumScale;</programlisting>
            <para>But it probably would not be as fast as the swizzle and vector math version.</para>
        </section>
    </section>
    <section xml:id="Tut04_matrix">
        <?dbhtml filename="Tut04 The Matrix Has You.html" ?>
        <title>The Matrix has You</title>
        <para>So, now that we can put the world into perspective, let's do it the right way. The
                <quote>needlessly overcomplicated for the time being but will make sense in a few
                tutorials</quote> way.</para>
        <para>First, let us look at the system of equations used to compute clip coordinates from
            camera space. Given that <literal>S</literal> is the frustum scale factor,
                <literal>N</literal> is the zNear and <literal>F</literal> is the zFar, we get the
            following four equations.</para>
        <equation>
            <title>Camera to Clip Equations</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="CameraToClipPartial.svg" width="600" 
                        contentwidth="6in"/>
                </imageobject>
            </mediaobject>
        </equation>
        <para>The odd spacing is intentional. For laughs, let's add a bunch of meaningless terms
            that do not change the equation, but starts to develop an interesting pattern:</para>
        <equation>
            <title>Camera to Clip Expanded Equations</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="CameraToClipNearMatrix.svg" width="800" 
                        contentwidth="6in"/>
                </imageobject>
            </mediaobject>
        </equation>
        <para>What we have here is what is known as a linear system of equations. The equations can
            be specified as a series of coefficients (the numbers being multiplied by the XYZW
            values) which are multiplied by the input values (XYZW) to produce the single output.
            Each individual output value is a linear combination of all of the input values. In our
            case, there just happen to be a lot of zero coefficients, so the output values in this
            particular case only depend on a few input values.</para>
        <para>You may be wondering at the multiplication of the additive term of
                Z<subscript>clip</subscript>'s value by the camera space W. Well, our input camera
            space position's W coordinate is always 1. So performing the multiplication is valid, so
            long as this continues to be the case. Being able to do what we are about to do is part
            of the reason why the W coordinate exists in our camera-space position values (the perspective divide is the other).</para>
        <para>We can re-express any linear system of equations using a special kind of formulation.
            You may recognize this reformulation, depending on your knowledge of linear
            algebra:</para>
        <equation>
            <title>Camera to Clip Matrix Transformation</title>
            <mediaobject>
                <imageobject>
                    <imagedata  fileref="CameraToClipMatrix.svg" width="600"
                        contentwidth="6in"/>
                </imageobject>
            </mediaobject>
        </equation>
        <para>The two long vertical columns of XYZW labeled <quote>clip</quote> and
                <quote>camera</quote> are 4-dimensional vectors; namely the clip and camera space
            position vectors. The larger block of numbers is a matrix. You probably are not familiar
            with matrix math. If not, it will be explained presently.</para>
        <para>Generically speaking, a <glossterm>matrix</glossterm> is a two dimensional block of
            numbers (matrices with more than 2 dimensions are called <quote>tensors</quote>).
            Matrices are very common in computer graphics. Thus far, we have been able to get along
            without them. As we get into more detailed object transformations however, we will rely
            more and more on matrices to simplify matters.</para>
        <para>In graphics work, we typically use 4x4 matrices; that is, matrices with 4 columns and
            4 rows respectively. This is due to the nature of graphics work: most of the things that
            we want to use matrices for are either 3 dimensional or 3 dimensional with an extra
            coordinate of data. Our 4D positions are just 3D positions with a 1 added to the
            end.</para>
        <para>The operation depicted above is a vector-matrix multiplication. A matrix of dimension
                <literal>n</literal>x<literal>m</literal> can only be multiplied by a vector of
            dimension <literal>n</literal>. The result of such a multiplication is a vector of
            dimension <literal>m</literal>. Since our matrix in this case is 4x4, it can only be
            multiplied with a 4D vector and this multiplication will produce a 4D vector.</para>
        <para>Matrix multiplication does what the expanded equation example does. For every row in
            the matrix, the values of each component of the column are multiplied by the
            corresponding values in the rows of the vector. These values are then added together;
            that becomes the single value for the row of the output vector.</para>
        <equation>
            <title>Vector Matrix Multiplication</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="VectorMatrixMultiplication.svg" />
                </imageobject>
            </mediaobject>
        </equation>
        <para>This results ultimately in performing 16 floating-point multiplications and 12
            floating-point additions. That's quite a lot, particularly compared with our current
            version. Fortunately, graphics hardware is designed to make these operations very fast.
            Because each of the multiplications are independent of each other, they could all be
            done simultaneously, which is exactly the kind of thing graphics hardware does fast.
            Similarly, the addition operations are partially independent; each row's summation
            does not depend on the values from any other row. Ultimately, vector-matrix
            multiplication usually generates only 4 instructions in the GPU's machine
            language.</para>
        <para>We can re-implement the above perspective projection using matrix math rather than
            explicit math. The <phrase role="propername">MatrixPerspective</phrase> tutorial does
            this.</para>
        <para>The vertex shader is much simpler in this case:</para>
        <example>
            <title>MatrixPerspective Vertex Shader</title>
            <programlisting language="glsl"><![CDATA[#version 330

layout(location = 0) in vec4 position;
layout(location = 1) in vec4 color;

smooth out vec4 theColor;

uniform vec2 offset;
uniform mat4 perspectiveMatrix;

void main()
{
    vec4 cameraPos = position + vec4(offset.x, offset.y, 0.0, 0.0);
    
    gl_Position = perspectiveMatrix * cameraPos;
    theColor = color;
}]]></programlisting>
        </example>
        <para>The OpenGL Shading Language (GLSL), being designed for graphics operations, naturally
            has matrices as basic types. The <type>mat4</type> is a 4x4 matrix (columns x rows).
            GLSL has types for all combinations of columns and rows between 2 and 4. Square matrices
            (matrices where the number of columns and rows are equal) only use one number, as in
                <type>mat4</type> above. So <type>mat3</type> is a 3x3 matrix. If the matrix is not
            square, GLSL uses notation like <type>mat2x4</type>: a matrix with 2 columns and 4
            rows.</para>
        <para>Note that the shader no longer computes the values on its own; it is
                <emphasis>given</emphasis> a matrix with all of the stored values as a uniform. This
            is simply because there is no need for it. All of the objects in a particular scene will
            be rendered with the same perspective matrix, so there is no need to waste potentially
            precious vertex shader time doing redundant computations.</para>
        <para>Vector-matrix multiplication is such a common operation in graphics that operator * is
            used to perform it. So the second line of <function>main</function> multiplies the
            perspective matrix by the camera position.</para>
        <para>Please note the <emphasis>order</emphasis> of this operation. The matrix is on the
            left and the vector is on the right. Matrix multiplication is <emphasis>not</emphasis>
            commutative, so M*v is not the same thing as v*M. Normally vectors are considered 1xN
            matrices (where N is the size of the vector). When you multiply vectors on the left of
            the matrix, GLSL considers it an Nx1 matrix; this is the only way to make the
            multiplication make sense. This will multiply the single row of the vector with each
            column, summing the results, creating a new vector. This is <emphasis>not</emphasis>
            what we want to do. We want to multiply rows of the matrix by the vector, not columns of
            the matrix. Put the vector on the right, not the left.</para>
        <para>The program initialization routine has a few changes:</para>
        <example>
            <title>Program Initialization of Perspective Matrix</title>
            <programlisting language="cpp"><![CDATA[offsetUniform = glGetUniformLocation(theProgram, "offset");

perspectiveMatrixUnif = glGetUniformLocation(theProgram, "perspectiveMatrix");

float fFrustumScale = 1.0f; float fzNear = 0.5f; float fzFar = 3.0f;

float theMatrix[16];
memset(theMatrix, 0, sizeof(float) * 16);

theMatrix[0] = fFrustumScale;
theMatrix[5] = fFrustumScale;
theMatrix[10] = (fzFar + fzNear) / (fzNear - fzFar);
theMatrix[14] = (2 * fzFar * fzNear) / (fzNear - fzFar);
theMatrix[11] = -1.0f;

glUseProgram(theProgram);
glUniformMatrix4fv(perspectiveMatrixUnif, 1, GL_FALSE, theMatrix);
glUseProgram(0);]]></programlisting>
        </example>
        <para>A 4x4 matrix contains 16 values. So we start by creating an array of 16 floating-point
            numbers called <varname>theMatrix</varname>. Since most of the values are zero, we can
            just set the whole thing to zero. This works because IEEE 32-bit floating-point numbers
            represent a zero as 4 bytes that all contain zero.</para>
        <para>The next few functions set the particular values of interest into the matrix. Before
            we can understand what's going on here, we need to talk a bit about ordering.</para>
        <para>A 4x4 matrix is technically 16 values, so a 16-entry array can store a matrix. But
            there are two ways to store a matrix as an array. One way is called
                <glossterm>column-major</glossterm> order, the other naturally is
                <glossterm>row-major</glossterm> order. Column-major order means that, for an NxM
            matrix (columns x rows), the first N values in the array are the first column
            (top-to-bottom), the next N values are the second column, and so forth. In row-major
            order, the first M values in the array are the first row (left-to-right), followed by
            another M values for the second row, and so forth.</para>
        <para>In this example, the matrix is stored in column-major order. So array index 14 is in
            the third row of the fourth column.</para>
        <para>The entire matrix is a single uniform. To transfer the matrix to OpenGL, we use the
                <function>glUniformMatrix4fv</function> function. The first parameter is the uniform
            location that we are uploading to. This function can be used to transfer an entire array
            of matrices (yes, uniform arrays of any type are possible), so the second parameter is
            the number of array entries. Since we're only providing one matrix, this value is
            1.</para>
        <para>The third parameter tells OpenGL what the ordering of the matrix data is. If it is
                <literal>GL_TRUE</literal>, then the matrix data is in row-major order. Since our
            data is column-major, we set it to <literal>GL_FALSE</literal>. The last parameter is
            the matrix data itself.</para>
        <para>Running this program will give us:</para>
        <figure>
            <title>Perspective Matrix</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="MatrixPerspective.png" contentwidth="3in"/>
                </imageobject>
            </mediaobject>
        </figure>
        <para>The same thing we had before. Only now done with matrices.</para>
    </section>
    <section>
        <?dbhtml filename="Tut04 Aspect of the World.html" ?>
        <title>Aspect of the World</title>
        <para>If you run the last program, and resize the window, the viewport resizes with it.
            Unfortunately, this also means that what was once a rectangular prism with a square
            front becomes elongated.</para>
        <figure>
            <title>Bad Aspect Ratio</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="MatrixPerspectiveSkew.png" contentwidth="5in"/>
                </imageobject>
            </mediaobject>
        </figure>
        <para>This is a problem of <glossterm>aspect ratio</glossterm>, the ratio of an image's
            width to its height. Currently, when you change the window's dimensions, the code calls
                <function>glViewport</function> to tell OpenGL the new size. This changes OpenGL's
            viewport transform, which goes from normalized device coordinates to window coordinates.
            NDC space has a 1:1 aspect ratio; the width and height of NDC space is 2x2. As long as
            window coordinates also has a 1:1 width to height ratio, objects that appear square in
            NDC space will still be square in window space. Once window space became non-1:1, it
            caused the transformation to also become not a square.</para>
        <para>What exactly can be done about this? Well, that depends on what you intend to
            accomplish by making the window bigger.</para>
        <para>One simple way to do this is to prevent the viewport from ever becoming non-square.
            This can be done easily enough by changing the <function>reshape</function> function to
            be this:</para>
        <example>
            <title>Square-only Viewport</title>
            <programlisting language="cpp"><![CDATA[void reshape (int w, int h)
{
    if(w < h)
        glViewport(0, 0, (GLsizei) w, (GLsizei) w);
    else
        glViewport(0, 0, (GLsizei) h, (GLsizei) h);
}]]></programlisting>
        </example>
        <para>Now if you resize the window, the viewport will always remain a square. However, if
            the window is non-square, there will be a lot of empty space either to the right or
            below the viewport area. This space cannot be rendered into with triangle drawing
            commands (for reasons that we will see in the next tutorial).</para>
        <para>This solution has the virtue of keeping the viewable region of the world fixed,
            regardless of the shape of the viewport. It has the disadvantage of wasting window
            space.</para>
        <para>What do we do if we want to use as much of the window as possible? There is a way to
            do this.</para>
        <para>Go back to the definition of the problem. NDC space is a [-1, 1] cube. If an object in
            NDC space is a square, in order for it to be a square in window coordinates, the
            viewport must also be a square. Conversely, if you want non-square window coordinates,
            the object in NDC space <emphasis>must not be a square.</emphasis></para>
        <para>So our problem is with the implicit assumption that squares in camera space need to
            remain squares throughout. This is not the case. To do what we want, we need to
            transform things into clip space such that they are the correct non-square shape that,
            once the perspective divide and viewport transform converts them into window
            coordinates, they are again square.</para>
        <para>Currently, our perspective matrix defines a square-shaped frustum. That is, the top
            and bottom of the frustum (if it were visualized in camera space) would be squares. What
            we need to do instead is create a rectangular frustum.</para>
        <figure>
            <title>Widescreen Aspect Ratio Frustum</title>
            <mediaobject>
                <imageobject>
                    <imagedata fileref="RectViewFrustum.svg"  contentwidth="5in"/>
                </imageobject>
            </mediaobject>
        </figure>
        <para>We already have some control over the shape of the frustum. We said originally that we
            did not need to move the eye position from the origin because we could simply scale the
            X and Y positions of everything to achieve a similar effect. When we do this, we scale
            the X and Y by the same value; this produces a uniform scale. It also produces a square
            frustum, as seen in camera space. Since we want a rectangular frustum, we need to use a
            non-uniform scale, where the X and Y positions are scaled by different values.</para>
        <para>What this will do is show <emphasis>more</emphasis> of the world. But in what
            direction do we want to show more? Human vision tends to be more horizontal than
            vertical. This is why movies tend to use a minimum of 16:9 width:height aspect ratio
            (most use more width than that). So it is usually the case that you design a view for a
            particular height, then adjust the width based on the aspect ratio.</para>
        <para>This is done in the <phrase role="propername">AspectRatio</phrase> tutorial. This code
            uses the same shaders as before; it simply modifies the perspective matrix in the
                <function>reshape</function> function.</para>
        <example>
            <title>Reshape with Aspect Ratio</title>
            <programlisting language="cpp"><![CDATA[void reshape (int w, int h)
{
    perspectiveMatrix[0] = fFrustumScale * (h / (float)w);
    perspectiveMatrix[5] = fFrustumScale;
    
    glUseProgram(theProgram);
    glUniformMatrix4fv(perspectiveMatrixUnif, 1, GL_FALSE, perspectiveMatrix);
    glUseProgram(0);
    
    glViewport(0, 0, (GLsizei) w, (GLsizei) h);
}
]]></programlisting>
        </example>
        <para>The matrix, now a global variable called <varname>perspectiveMatrix</varname>, gets
            its other fields from the program initialization function just as before. The aspect
            ratio code is only interested in the XY scale values.</para>
        <para>Here, we change the X scaling based on the ratio of height to width. The Y scaling is
            left alone.</para>
        <para>Also, the offset used for positioning the prism was changed from (0.5, 0.5) to (1.5,
            0.5). This means that part of the object is off the side of the viewport until you
            resize the window. Changing the width shows more of the area; only by changing the
            height do you actually make the objects bigger. The square always looks like a
            square.</para>
    </section>
    <section>
        <?dbhtml filename="Tut04 In Review.html" ?>
        <title>In Review</title>
        <para>In this tutorial, you have learned about the following:</para>
        <itemizedlist>
            <listitem>
                <para>Face culling can cause triangles to be culled (not rendered) based on the
                    order of the vertices in window space.</para>
            </listitem>
            <listitem>
                <para>Perspective projections are used to give a scene the appearance of depth,
                    where objects farther away appear smaller and offset compared to near ones.
                    OpenGL hardware has special provisions for perspective projections; namely the
                    transform from clip-space to NDC space division by W.</para>
            </listitem>
            <listitem>
                <para>The perspective transformation can be performed as a matrix multiplication
                    operation. Matrix/vector multiplication is a way to compute multiple linear
                    equations in a single operation.</para>
            </listitem>
            <listitem>
                <para>The proper aspect ratio for a display image can be maintained by scaling the X
                    and Y coordinates of camera-space vertices based on the window's aspect ratio.
                    This transformation can be folded into the perspective projection matrix.</para>
            </listitem>
        </itemizedlist>
        <section>
            <title>Further Study</title>
            <para>Try doing these things with the given programs.</para>
            <itemizedlist>
                <listitem>
                    <para>In all of the perspective tutorials, we only ever had a frustum scale of
                        1.0. Adjust the frustum scale and see how it affects the scene.</para>
                </listitem>
                <listitem>
                    <para>Adjust the zNear distance, so that it intersects with the prism. See how
                        this affects the rendering. Adjust the zFar distance similarly and see what
                        happens.</para>
                </listitem>
                <listitem>
                    <para>We made some simplifying assumptions in our perspective transformation
                        algorithm. In particular, we fixed the eye point at (0, 0, 0). and the plane
                        at (0, 0, 1). However, this was not strictly necessary; we could have
                        altered our perspective transform algorithm to use a variable eye point.
                        Adjust the <phrase role="propername">ShaderPerspective</phrase> to implement
                        an arbitrary perspective plane location (the size remains fixed at [-1, 1]).
                        You will need to offset the X, Y camera-space positions of the vertices by
                            E<subscript>x</subscript> and E<subscript>y</subscript> respectively,
                        but only <emphasis>after</emphasis> the scaling (for aspect ratio). And you
                        will need to divide the camera-space Z term by -E<subscript>z</subscript>
                        instead of just -1.</para>
                </listitem>
                <listitem>
                    <para>Do the above, but in matrix form. Remember that any terms placed in the
                        fourth column will be added to that component, due to the multiplication by
                            W<subscript>camera</subscript> (which is always 1.0).</para>
                </listitem>
            </itemizedlist>
        </section>
        <section>
            <title>OpenGL Functions of Note</title>
            <glosslist>
                <glossentry>
                    <glossterm>glEnable/glDisable</glossterm>
                    <glossdef>
                        <para>These functions activate or inactivate certain features of OpenGL.
                            There is a large list of possible features that can be enabled or
                            disabled.</para>
                    </glossdef>
                </glossentry>
                <glossentry>
                    <glossterm>glCullFace/glFrontFace</glossterm>
                    <glossdef>
                        <para>These two functions control how face culling works.
                                <function>glFrontFace</function> defines which triangle winding
                            order is considered the front. <function>glCullFace</function> defines
                            what face gets culled. This function can also cull
                                <emphasis>all</emphasis> faces, though this is not useful if you
                            want to get rendering done.</para>
                        <para>These functions only do something useful if
                                <literal>GL_CULL_FACE</literal> is currently enabled. They still set
                            the values internally even if <literal>GL_CULL_FACE</literal> is not
                            enabled, so enabling it later will use the up-to-date settings.</para>
                    </glossdef>
                </glossentry>
            </glosslist>
        </section>
    </section>
    <section xml:id="Tut04_Glossary">
        <?dbhtml filename="Tut04 Glossary.html" ?>
        <title>Glossary</title>
        <glosslist>
            <glossentry>
                <glossterm>face culling</glossterm>
                <glossdef>
                    <para>The ability to cull triangles based on the winding order of the triangle.
                        This functionality is activated in OpenGL by using
                            <function>glEnable</function> with <literal>GL_CULL_FACE</literal>.
                        Which faces get culled is determined by the <function>glCullFace</function>
                        and <function>glFrontFace</function> functions.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>winding order</glossterm>
                <glossdef>
                    <para>The order, clockwise or counter-clockwise, that the 3 vertices that make
                        up a triangle are received in. This is measured in window coordinates,
                        two-dimensionally.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>projection</glossterm>
                <glossdef>
                    <para>The act of taking a series of objects in a higher dimension and presenting
                        those objects in a lower dimension. The act of rendering a 3D scene to a 2D
                        image requires projecting that scene from three dimensions into two
                        dimensions.</para>
                    <para>Projection always happens relative to a surface of projection. Projecting
                        2D space onto a 1D space requires a finite line to be projected on.
                        Projecting 3D space onto 2D space requires a plane of projection. This
                        surface is defined in the higher dimension's world.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>orthographic projection</glossterm>
                <glossdef>
                    <para>A form of projection that simply negates all offsets in the direction
                        perpendicular to the surface of projection. When doing a 3D to 2D
                        orthographic projection, if the plane is axis aligned, then the projection
                        can be done simply. The coordinate that is perpendicular to the plane of
                        projection is simply discarded. If the plane is not axis aligned, then the
                        math is more complex, but it has the same effect.</para>
                    <para>Orthographic projections are uniform in the direction of the projection.
                        Because of the uniformity, lines that are parallel in the higher dimension
                        space are guaranteed to remain parallel in the lower dimension space.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>perspective projection</glossterm>
                <glossdef>
                    <para>A form of projection that projects onto the surface based on a position,
                        the eye position. Perspective projections attempt to emulate a pin-hole
                        camera model, which is similar to how human eyes see. The positions of
                        objects in space are projected onto the surface of projection radially based
                        on the eye position.</para>
                    <para>Parallel lines in the higher dimension are <emphasis>not</emphasis>
                        guaranteed to remain parallel in the lower dimension. They might, but they
                        might not.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>frustum</glossterm>
                <glossdef>
                    <para>Geometrically, a frustum is 3D shape; a pyramid that has the top chopped
                        off. The view of a 3D to 2D perspective projection, from the eye through the
                        plane of projection has the shape of a frustum.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>perspective divide</glossterm>
                <glossdef>
                    <para>A new name for the transformation from clip space to normalized device
                        coordinate space. This is so called because the division by W is what allows
                        perspective projection to work using only matrix math; a matrix alone would
                        not otherwise be able to perform the full perspective projection
                        operation.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>camera space</glossterm>
                <glossdef>
                    <para>An arbitrarily defined, but highly useful, space from which the
                        perspective projection can be performed relatively easily. Camera space is
                        an infinitely large space, with positive X going right, positive Y going up,
                        and positive Z coming towards the viewer.</para>
                    <para>In camera space, the eye position of the perspective projection is assumed
                        to be at (0, 0, 1), and the plane of projection is a [-1, 1] plane in X and
                        Y, which passes through the 3D origin. Thus, all points that have a positive
                        Z are considered to be behind the camera and thus out of view.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>camera zNear, camera zFar</glossterm>
                <glossdef>
                    <para>Normalized device coordinate (NDC) space is bounded in all dimensions on
                        the range [-1, 1]. Camera space is unbounded, but the perspective transform
                        implicitly bounds what is considered in view to [-1, 1] in the X and Y axis.
                        This leaves the Z axis unbounded, which NDC space does not allow.</para>
                    <para>The camera zNear and zFar values are numbers that define the minimum and
                        maximum extent of Z in the perspective projection transform. These values
                        are positive value, though they represent negative values in camera space.
                        Using the standard perspective transform, both values must be greater than
                        0, and zNear must be less than zFar.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>swizzle selection</glossterm>
                <glossdef>
                    <para>Swizzle selection is a vector technique, unique to shading languages, that
                        allows you to take a vector and arbitrarily build other vectors from the
                        components. This selection is completely arbitrary; you can build a vec4
                        from a vec2, or any other combination you wish, up to 4 elements.</para>
                    <para>Swizzle selections use combinations of <quote>x,</quote>
                        <quote>y,</quote>
                        <quote>z,</quote> and <quote>w</quote> to pick components out of the input
                        vector. Swizzle operations look like this:</para>
                    <programlisting language="glsl">vec2 firstVec;
vec4 secondVec = firstVec.xyxx;
vec3 thirdVec = secondVec.wzy;</programlisting>
                    <para>Swizzle selection is, in graphics hardware, considered an operation so
                        fast as to be instantaneous. That is, graphics hardware is built with
                        swizzle selection in mind.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>matrix</glossterm>
                <glossdef>
                    <para>A two-dimensional arrangement of numbers. Like vectors, matrices can be
                        considered a single element. Matrices are often used to represent the
                        coefficients in a system of linear equations; because of this (among other
                        things), matrix math is often called linear algebra.</para>
                    <para>The size of a matrix, the number of columns and rows (denoted as NxM,
                        where N is the number of columns and M is the number of rows) determines the
                        kind of matrix. Matrix arithmetic has specific requirements on the two
                        matrices involved, depending on the arithmetic operation. Multiplying two
                        matrices together can only be performed if the number of rows in the matrix
                        on the left is equal to the number of columns in the matrix on the right.
                        For this reason, among others, matrix multiplication is not commutative (A*B is not B*A;
                        sometimes B*A is not even possible).</para>
                    <para>4x4 matrices are used in computer graphics to transform 3 or 4-dimensional
                        vectors from one space to another. Most kinds of linear transforms can be
                        represented with 4x4 matrices.</para>
                </glossdef>
            </glossentry>
            <glossentry>
                <glossterm>column-major, row-major</glossterm>
                <glossdef>
                    <para>These terms define the two ways in which a matrix can be stored as an
                        array of values. Column-major order means that, for an NxM matrix (columns x
                        rows), the first N values in the array are the first column (top-to-bottom),
                        the next N values are the second column, and so forth. In row-major order,
                        the first M values in the array are the first row (left-to-right), followed
                        by another M values for the second row, and so forth.</para>
                </glossdef>
            </glossentry>
        </glosslist>
    </section>
</chapter>
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.