\begin{thebibliography}{} \bibitem[Abramowicz et~al., 1997]{nips-9:Abramowicz+Horn+Naftaly:1997} Abramowicz, H., Horn, D., Naftaly, U., and Sahar-Pikielny, C. (1997). \newblock An orientation selective neural network for pattern identification in particle detectors. \newblock In Mozer, M.~C., Jordan, M.~I., and Petsche, T., editors, {\em Advances in Neural Information Processing Systems}, volume~9, page 925. The {MIT} Press. \bibitem[Ambroise and Govaert, 1996]{Ambroise} Ambroise, C. and Govaert, G. (1996). \newblock Constrained clustering and kohonen self-organizing maps. \newblock {\em Journal of Classification}, 13(2):299--313. \bibitem[Anouar et~al., 1998]{Anouar98} Anouar, F., Badran, F., and Thiria, S. (1998). \newblock Probabilistic self organizing map and radial basis function. \newblock {\em Journal of Neurocomputing}, 20:83--96. \bibitem[Armstrong, 1998]{Goestat98} Armstrong, M.~A. (1998). \newblock {\em Basic Linear Geostatistics}. \newblock Springer-Verlag, New York. \bibitem[Atteia and Gaches, 1999]{atteiaGaches99} Atteia, M. and Gaches, J. (1999). \newblock {\em Approxiation Hilbertienne}. \newblock Presses Universitaires de Grenoble. \bibitem[Barron, 1994]{Barron94} Barron, A.~R. (1994). \newblock Approximation and estimation bounds for artificial neural networks. \newblock {\em Machine Learning}, 14:115--133. \bibitem[Bartlett et~al., 2000]{bouchero00} Bartlett, P., Boucheron, S., and Lugosi, G. (2000). \newblock Model selection and error estimation. \newblock accepted Machine Learning. \bibitem[Bartlett, 1998]{bartlett98sample} Bartlett, P.~L. (1998). \newblock The sample complexity of pattern classification with neural networks: the size of the weights is more important than the size of the network. \newblock {\em IEEE Trans. Inf. Theory}, 44(2):525-- 536. \bibitem[Bennett and Mangassarian, 1990]{RLP92} Bennett, K. and Mangassarian, O. (1990). \newblock Neural networks training via linear programming. \newblock In Pardalos, P., editor, {\em Advances in Optimization and parallel computing}, pages 56--67. North Holland. \bibitem[Bertels et~al., 2001]{Bertels01} Bertels, K., Neuberg, L., Vassiliadis, S., and Pechanek, D. (2001). \newblock On chaos and neural networks: The backpropagation algorithm. \newblock {\em Artificial Intellignence Review}, 15:165--187. \bibitem[Birg{\'e} and Massart, 1997]{BirgeMassart97} Birg{\'e}, L. and Massart, P. (1997). \newblock From model selection to adaptive estimation. \newblock In D.~Pollard, E.~T. and Yang, G., editors, {\em Festschrift for Lucien Lecam: Research papers in Probability and Statistics}, pages 55--87. \bibitem[Bishop, 1995]{bishop95} Bishop, C.~M. (1995). \newblock {\em Neural Networks for Pattern Recognition}. \newblock Clarendon Press, Oxford. \bibitem[Botha et~al., 1996]{nn:Botha+Barnard+Barnard:1996} Botha, E.~C., Barnard, E., and Barnard, C.~J. (1996). \newblock Feature-based classification of aerospace radar targets using neural networks. \newblock {\em Neural Networks}, 9(1):129--142. \bibitem[Bradley et~al., 1999]{DM98} Bradley, P., Fayyad, U., and Mangassarian, O. (1999). \newblock Mathematical programmation for data mining: formulation and chalanges. \newblock {\em Journal of computing (special issue on data mining)}. \bibitem[Bridle, 1990]{BridleNIPS290} Bridle, J.~S. (1990). \newblock Training stochastic model recognition algorithms as networks can lead to maximum mutual information estimation of parameters. \newblock In Touretzky, D., editor, {\em Advances in Neural Information Processing Systems, NIPS'89}, volume~2, pages 211--217. Morgan-Kaufmann. \bibitem[Burgess, 1997]{BurgessNips997} Burgess, A.~N. (1997). \newblock Estimating equivalent kernels for neural networks: a data perturbation approach. \newblock In Mozer, M.~C., Jordan, M.~I., and Petsche, T., editors, {\em Advances in Neural Information Processing Systems}, volume~9, page 382. The {MIT} Press. \bibitem[Campbell, 1997]{campbell97constructive} Campbell, C. (1997). \newblock Constructive learning techniques for designing neural network systems. \newblock In Leondes, C., editor, {\em Neural Network Systems Technologies and Applications}. Academic Press. \bibitem[Cesa-Bianchi and Lugosi, 1999]{prediction99} Cesa-Bianchi, N. and Lugosi, G. (1999). \newblock On prediction of individual sequences. \newblock {\em Annals of Statistics}, 27(6). \bibitem[Chapelle et~al., 2001]{Chapelle01} Chapelle, O., Weston, J., Bottou, L., and Vapnik, V. (2001). \newblock Vicinal risk minimization. \newblock In Leen, T.~K., Dietterich, T.~G., and Tresp, V., editors, {\em Advances in Neural Information Processing Systems 13}, pages 416--422. MIT Press. \bibitem[Debnath and Mikusinski, 1998]{gateauDeriv} Debnath, L. and Mikusinski, P. (1998). \newblock {\em Introduction to Hilbert Spaces with Applications}. \newblock Academic Press. \bibitem[Devroye et~al., 1996]{devroye96probabilistic} Devroye, L., Gy{\"o}rfi, L., and Lugosi, G. (1996). \newblock {\em A Probabilistic Theory of Pattern Recognition}. \newblock Springer, New York. \bibitem[Dontchev and Zolezzi, 1993]{DontZol93} Dontchev, A. and Zolezzi, T. (1993). \newblock {\em Well posed optimization problems}. \newblock Spinger-Verlag. \newblock Lecture notes in Mathematics 1543. \bibitem[Duda et~al., 2001]{nn-books:r.+stork:2001} Duda, R.~O., Hart, P.~E., and Stork, D.~G. (2001). \newblock {\em Pattern Classification (2nd ed.)}. \newblock John Wiley and Sons. \newblock ISBN: 0-471-05669-3. \bibitem[Evgeniou et~al., 2000]{EvgPonPog00} Evgeniou, T., Pontil, M., and Poggio, T. (2000). \newblock Regularization networks and support vector machines. \newblock {\em Advances in Computational Mathematics}. \bibitem[{Fogelman Soulié}, 1997]{Francoise97} {Fogelman Soulié}, F. (1997). \newblock réseaux de neurones et statistiques. une introduction. \newblock In {\em Statistique et méthodes neuronales}. Dunod. \bibitem[Freund and Schapire, 1999]{freund99short} Freund, Y. and Schapire, R. (1999). \newblock A short introduction to boosting. \bibitem[Friedman, 1997]{Friedman97} Friedman, J.~H. (1997). \newblock On bias, variance, 0/1 loss, and the curse of dimensionality. \newblock {\em Data Mining and Knowledge Discovery}, 1(1):55--77. \bibitem[Friedman and Stuetzle, 1981]{PPFridman81} Friedman, J.~H. and Stuetzle, W. (1981). \newblock Projection pursuit regression. \newblock {\em Journal of the American Statistical Association}, 76:817--823. \bibitem[Frie{\ss} et~al., 1998]{FriCriCam98} Frie{\ss}, T.-T., Cristianini, N., and Campbell., C. (1998). \newblock The kernel adatron algorithm: {A} fast and simple learning procedure for support vector machines. \newblock In {\em 15th Intl.\ Conf.\ Machine Learning}. Morgan Kaufmann Publishers. \bibitem[Gallinari, 1997]{PatrickGallin97} Gallinari, P. (1997). \newblock Méthodes neuronales et discrimination. \newblock In {\em Statistique et méthodes neuronales}. Dunod. \bibitem[Gallinari et~al., 1999]{gallinari} Gallinari, P., Zaragoza, H., and Amini, M.-R. (1999). \newblock Apprentissage et données textuelles. \newblock Ecole Modulad - SFdS (Modulad'99). \bibitem[Ghosh and Nag, 2000]{RBFreviewGhosh90} Ghosh, J. and Nag, A. (2000). \newblock Radial basis function networks. \newblock In Howlett, R.~J. and Jain, L.~C., editors, {\em Radial Basis Function Neural Network Theory and Applications}. Physica-Verlag. \bibitem[Grandvalet et~al., 1997]{Grandvalet97} Grandvalet, Y., Canu, S., and Boucheron, S. (1997). \newblock Noise injection: Theoretical prospects. \newblock {\em Neural Computation}, 9(5):1093--1108. \bibitem[Hastie and Tibshirani, 1990]{Hastie} Hastie, T. and Tibshirani, R. (1990). \newblock {\em Generalized Additive Models}. \newblock Chapman and Hall. \newblock Monographs on statistics and applied probability, 43. \bibitem[Haykin, 1994]{nn-books:Haykin:1994} Haykin, S. (1994). \newblock {\em Neural Networks. {A} Comprehensive Foundation}. \newblock Macmillan College Publishing, New York. \bibitem[Hertz et~al., 1991]{hertz91} Hertz, J., Krogh, A., and Palmer, R.~G. (1991). \newblock {\em An introduction to the theory of Neural Computation}. \newblock Addison-Wesley. \bibitem[Hintz-Madsen et~al., 1998]{hintzmadsen98neural} Hintz-Madsen, M., Hansen, L.~K., Larsen, M. W. P.~J., and Larsen, M. (1998). \newblock Neural classifier construction using regularization, pruning and test error estimation. \newblock {\em Neural Networks}, 11(9):1659--1670. \bibitem[Holmst{\"o}m et~al., 1997]{Holmstom97} Holmst{\"o}m, L., Koistinen, P., Laaksonen, J., and Oja, E. (1997). \newblock Neural ans statistical classifiers--taxonomy and two case studies. \newblock {\em IEEE transactions on Neural Networks}, 8(1):5--17. \newblock {Jain, Alan, Guest editor}. \bibitem[Hosseini, 2000]{Hosseini00} Hosseini, S. (2000). \newblock {\em Contribution à la régression non linéaire par les réseaux de neurones}. \newblock PhD thesis, Institut National Polytechnique de Grenoble. \bibitem[Jain, 1997]{IEEENNPR} Jain, A. (1997). \newblock Special issue on neural network and pattern recognition. \newblock {\em {IEEE} Transaction on Neural Networks}, 8(1). \bibitem[Juditsky et~al., 1995]{BBmodelTheorie95} Juditsky, A., Hjalmarsson, H., Benveniste, A., Delyon, B., Ljung, L., Sjoberg, J., and Qinghua, Z. (1995). \newblock Nonlinear black-box models in system identification: Mathematical foundations. \newblock {\em Automatica}, 31(12):1725--1750. \bibitem[Kearns et~al., 1997]{MCcompareML97} Kearns, M., Mansour, Y., Ng, A.~Y., and Ron, D. (1997). \newblock An experimental and theoretical comparison of model selection methods. \newblock {\em Machine Learning}, 7:7--50. \bibitem[Kittler, 1986]{Kittler86} Kittler, J. (1986). \newblock Feature selection and extraction. \newblock In {\em Handbook of Pattern Recognition and Image Processing}. Accademic Press. \bibitem[Krauth and Mezard, 1987]{KrauthMezard87} Krauth, W. and Mezard, M. (1987). \newblock Learning algorithm with optimal stability in neural networks. \newblock {\em Journal of Physics A}, 20:745--752. \bibitem[Lampinen and Vehtari, 2001]{nn:lampinen+vehtari:2001} Lampinen, J. and Vehtari, A. (2001). \newblock Bayesian approach for neural networks - review and case studies. \newblock {\em Neural Networks}, 14(3):257--274. \bibitem[LeCun et~al., 1998]{effBackProp98} LeCun, Y., Bottou, L., Orr, G.~B., and M{\"u}ller, K.-R. (1998). \newblock Efficient backprop. \newblock In Orr, G.~B. and M{\"u}ller, K.-R., editors, {\em Neural Networks: Tricks of the Trade}, pages 5--50. Springer-Verlag, Berlin. \newblock Springer Lecture Notes in Computer Sciences 1524. \bibitem[Leray and Gallinari, 2001]{Leray01} Leray, P. and Gallinari, P. (2001). \newblock De l'utilisation d'{{\em OBD}} pour la s{\'e}lection de variables dans les perceptrons multi-couches. \newblock {\em Revue d'Intelligence Artificielle}. \newblock A paraître. \bibitem[Lodhi et~al., 2001]{LodShaCriWat01} Lodhi, H., Shawe-Taylor, J., Cristianini, N., and Watkins, C. (2001). \newblock Text classification using string kernels. \newblock In Leen, T.~K., Dietterich, T.~G., and Tresp, V., editors, {\em Advances in Neural Information Processing Systems 13}, pages 563--569. MIT Press. \bibitem[Makovoz, 1996]{Makovoz96} Makovoz, Y. (1996). \newblock Random approximants and neural networks. \newblock {\em Journal of Approximation Theory}, 85:98--109. \bibitem[Mangasarian and Musicant, 2001]{ManMus01} Mangasarian, O.~L. and Musicant, D.~R. (2001). \newblock Active support vector machine classification. \newblock In Leen, T.~K., Dietterich, T.~G., and Tresp, V., editors, {\em Advances in Neural Information Processing Systems 13}, pages 577--583. MIT Press. \bibitem[Mangassarian, 1993]{Mang93} Mangassarian, O. (1993). \newblock Missclassification minimisation. \newblock {\em Journal of global optimization}, pages 309--323. \bibitem[Masson and Linster, 1996]{Masson96} Masson, C. and Linster, C. (1996). \newblock Towards a cognitive understanding of odor discrimination: combining experimental and theoretical approaches. \newblock {\em Behavioural Processes}, 35:63--82. \bibitem[Milgram, 1993]{milgram93} Milgram, M. (1993). \newblock {\em Reconnaissance des formes : Méthodes numériques et connexionnistes}. \newblock Armand Collin, Paris. \bibitem[Murata et~al., 1994]{NIC94} Murata, N., Yoshizawa, S., and Amari, S. (1994). \newblock Network information criterion -- determining the number of hidden units for an artificial neural network model. \newblock {\em IEEE Transactions on Neural Networks}, 5(6):865--872. \bibitem[Musavi et~al., 1998]{nn:Musavi+Bryant+Qiao:1998} Musavi, M., Bryant, R., Qiao, M., Davisson, M., Akeson, E., and French, B. (1998). \newblock Mouse chromosome classification by radial basis function network with fast orthogonal search. \newblock {\em Neural Networks}, 11(4):769--777. \bibitem[Nadal, 1993]{Nadal93} Nadal, J. (1993). \newblock {\em Réseaux de neurones : de la physique à la physiologie}. \newblock Armand Collin, Paris. \bibitem[Orr and M{\"u}ller, 1998]{OrrMuller98} Orr, G.~B. and M{\"u}ller, K.-R. (1998). \newblock {\em Neural Networks: Tricks of the Trade}. \newblock Springer-Verlag, Berlin. \bibitem[Pinkus, 1999]{pinkus99approximation} Pinkus, A. (1999). \newblock Approximation theory of the {MLP} model in neural networks. \newblock {\em Acta Numerica}, 8:143--195. \bibitem[Poggio and Girosi, 1990]{PoggioGirosi90} Poggio, T. and Girosi, F. (1990). \newblock Networks for approximation and learning. \newblock {\em Proceedings of the IEEE (special issue: Neural Networks I: Theory and Modeling)}, 78(9):1481--1497. \bibitem[Portnoy and Koenker, 1997]{tortoiseHare97} Portnoy, S. and Koenker, R. (1997). \newblock The gaussian hare and the laplacian tortoise: Computability of squared-error versus absolute error estimates. \newblock {\em Statistical science}, 12(4):279--300. \bibitem[Prechelt, 1998]{earlystopping99} Prechelt, L. (1998). \newblock Early stopping -- but when? \newblock In Orr, G.~B. and Muller, K.-R., editors, {\em Neural Networks: Tricks of the Trade}, volume 1524, pages 57--69. Springer-Verlag. \bibitem[Reed, 1993]{pruningRevies93} Reed, R. (1993). \newblock Pruning algorithms -- a survey. \newblock {\em IEEE Transactions on Neural Networks}, 4(5). \bibitem[Richard and Lengelle, 2001]{RichardLengelle} Richard, C. and Lengelle, R. (2001). \newblock Apprentissage de règles de décision à structure imposée et contrôle de la complexité. \bibitem[Ripley, 1996]{Ripley96} Ripley, B.~D. (1996). \newblock {\em Pattern recognition and neural networks}. \newblock Cambridge university press, Cambridge. \bibitem[{Sch\"olkopf} et~al., 1999]{SchBurSmo99} {Sch\"olkopf}, B., Burges, C.~J.~C., and Smola, A.~J. (1999). \newblock {\em Advances in Kernel Methods --- Support Vector Learning}. \newblock MIT Press, Cambridge, MA. \bibitem[Sigurdsson et~al., 2000]{revueRgularization00} Sigurdsson, S., Larsen, J., and Hansen, L. (2000). \newblock On comparison of adaptive regularization methods. \newblock In Widrow, B., Guan, L., Paliwa, K., Adali, T., Larsen, J., Wilson, E., and Douglas, S., editors, {\em Proceedings of the IEEE Workshop on Neural Networks for Signal Processing X}, pages 221--230. IEEE. Sydney. \bibitem[Smola et~al., 2000]{Smolla2000} Smola, A.~J., Bartlett, P.~L., Sch{\"o}lkopf, B., and Schuurmans, D., editors (2000). \newblock {\em Advances in Large Margin Classifiers}. \newblock MIT Press. \bibitem[Sontag, 1998]{SontagCVdimNN98} Sontag, E.~D. (1998). \newblock {VC} dimension of neural networks. \newblock In Bishop, C., editor, {\em Neural Networks and Machine Learning}. Springer-Verlag. \bibitem[Specht, 1990]{nn:specht90} Specht, D.~F. (1990). \newblock Probabilistic neural networks. \newblock {\em Neural Networks}, 3(1):109--118. \bibitem[Vapnik, 1995]{nn-books:Vapnik:1995} Vapnik, V.~N. (1995). \newblock {\em The nature of statistical learning theory}. \newblock Springer, New York. \bibitem[Vapnik, 1999]{nn-books:Vapnik:1999} Vapnik, V.~N. (1999). \newblock {\em Statistical learning theory}. \newblock Wiley inter science. \bibitem[Waterhouse and Cook, 1997]{nips-9:Waterhouse+Cook:1997} Waterhouse, S. and Cook, G. (1997). \newblock Ensemble methods for phoneme classification. \newblock In Mozer, M.~C., Jordan, M.~I., and Petsche, T., editors, {\em Advances in Neural Information Processing Systems}, volume~9, page 800. The {MIT} Press. \bibitem[Weigend et~al., 1998]{nn-books:Weigend+AbuMostafa+Refenes:1998} Weigend, A.~S., Abu-Mostafa, Y.~S., and Refenes, A.-P.~N., editors (1998). \newblock {\em Decision Technologies for Financial Engineering: Proceedings of the Fourth International Conference on Neural Networks in the Capital Markets (NNCM'96)}, volume~7 of {\em Progress in Neural Processing series}. \newblock World Scientific. \bibitem[White, 1989]{White89} White, H. (1989). \newblock Learning in artificial neural networks: A statistical perspective. \newblock {\em Neural Computation}, 1:425--464. \bibitem[Williams and Barber, 1998]{ChrisWillams98} Williams, C. K.~I. and Barber, D. (1998). \newblock Bayesian classification with gaussian processes gzipped postscript. \newblock {\em IEEE Trans Pattern Analysis and Machine Intelligence}, 20(12):1342--1351. \bibitem[Zhao and Atkeson, 1996]{zhao96implementing} Zhao, Y. and Atkeson, C.~G. (1996). \newblock Implementing projection pursuit learning. \newblock {\em IEEE Transactions on Neural Networks}, 7(2):362--373. \end{thebibliography}