 |
Benjamin
Piwowarski
,
Ingo
Frommholz
,
Yashar
Moshfeghi
,
Mounia
Lalmas
, and
Keith van
Rijsbergen
. Filtering documents with subspaces In Proceedings of the 32nd European Conference on Information Retrieval 2010.Bibtex @inproceedings{Piwowarski2010Filtering-documents, author = {
Benjamin
Piwowarski
}
, author = {
Ingo
Frommholz
}
, author = {
Yashar
Moshfeghi
}
, author = {
Mounia
Lalmas
}
, author = {
Keith van
Rijsbergen
}
, booktitle = {Proceedings of the 32nd European Conference on Information Retrieval}
, crossref = {ECIR2010}
, note = {Poster}
, short-title = {Proceedings of the 32nd ECIR Conference}
, title = {Filtering documents with subspaces}
, type = {International Conference}
, year = {2010}
} |
  |
Juan M.
Fernández-Luna
,
Juan F.
Huete
, and
Benjamin
Piwowarski
. Introduction to the special issue on Graphical Models and Information Retrieval In International Journal of Approximate Reasoning 50
(7).
July 2009,
pages 929–931.
Bibtex @article{Fernandez-Luna2009Introduction-to-the-special, author = {
Juan M.
Fernández-Luna
}
, author = {
Juan F.
Huete
}
, author = {
Benjamin
Piwowarski
}
, doi = {10.1016/j.ijar.2009.04.003}
, issn = {0888-613X}
, journal = {International Journal of Approximate Reasoning}
, month = {July}
, number = {7}
, pages = {929–931}
, title = {Introduction to the special issue on Graphical Models and Information Retrieval}
, url = {http://www.sciencedirect.com/science/article/B6V07-4W6YK3J-2/2/134387a47513ac1949938924abd63bb7}
, volume = {50}
, year = {2009}
} |
   |
Jovan
Pehcevski
and
Benjamin
Piwowarski
. Evaluation Metrics for Structured Text Retrieval In Encyclopedia of Database Systems
Editted by
M. Tamer
\"Ozsu
.
Springer.
May 2009,
pages 1015–1024.
Bibtex @incollection{Pehcevski2009Evaluation-Metrics, author = {
Jovan
Pehcevski
}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Encyclopedia of Database Systems}
, doi = {10.1007/978-0-387-39940-9}
, editor = {
M. Tamer
\"Ozsu
}
, editor = {
Ling
Liu
}
, month = {May}
, pages = {1015–1024}
, publisher = {Springer}
, series = {Encyclopedia of Database Systems}
, title = {Evaluation Metrics for Structured Text Retrieval}
, year = {2009}
} |
  |
Benjamin
Piwowarski
,
Georges
Dupret
, and
Rosie
Jones
. Mining User Web Search Activity with Layered Bayesian Networks or How to Capture a Click in its Context In Proceedings of the Second ACM International Conference on Web Search and Data Mining Barcelona, Spain, February 2009.Bibtex @inproceedings{Piwowarski2009Mining-User, address = {Barcelona, Spain}
, author = {
Benjamin
Piwowarski
}
, author = {
Georges
Dupret
}
, author = {
Rosie
Jones
}
, booktitle = {Proceedings of the Second ACM International Conference on Web Search and Data Mining}
, crossref = {WSDM2009}
, doi = {10.1145/1498759.1498823}
, editor = {
Ricardo A.
Baeza-Yates
}
, editor = {
Paolo
Boldi
}
, editor = {
Berthier A.
Ribeiro-Neto
}
, editor = {
Berkant Barla
Cambazoglu
}
, keywords = {Web}
, keywords = {Information Retrieval}
, keywords = {Web Log Mining}
, month = {February}
, private = {Yes}
, publisher = {ACM}
, title = {Mining User Web Search Activity with Layered Bayesian Networks or How to Capture a Click in its Context}
, type = {International Conference}
, year = {2009}
} |
  |
Yashar
Moshfeghi
,
Deepak
Agarwal
,
Benjamin
Piwowarski
, and
Joemon M.
Jose
. Movie Recommender: Semantically Enriched Unified Relevance Model for Rating Prediction in Collaborative Filtering In ECIR Toulouse, France, mar 2009,
pages 54–65.
Bibtex @inproceedings{Moshfeghi2009Movie-Recommender:, address = {Toulouse, France}
, author = {
Yashar
Moshfeghi
}
, author = {
Deepak
Agarwal
}
, author = {
Benjamin
Piwowarski
}
, author = {
Joemon M.
Jose
}
, booktitle = {ECIR}
, crossref = {ECIR2009}
, doi = {10.1007/978-3-642-00958-7_8}
, editor = {
Mohand
Boughanem
}
, editor = {
Catherine
Berrut
}
, editor = {
Josiane
Mothe
}
, editor = {
Chantal
Soulé-Dupuy
}
, isbn = {978-3-642-00957-0}
, month = {mar}
, pages = {54–65}
, private = {Yes}
, publisher = {Springer}
, series = {Lecture Notes in Computer Science}
, short-title = {Proceedings of the 31th ECIR Conference}
, title = {Movie Recommender: Semantically Enriched Unified Relevance Model for Rating Prediction in Collaborative Filtering}
, type = {International Conference}
, volume = {5478}
, year = {2009}
} |
  |
Benjamin
Piwowarski
,
Andrew
Trotman
, and
Mounia
Lalmas
. Sound and Complete Relevance Assessments for XML Retrieval In ACM Transactions On Information Systems 27
(1).
January 2009.Bibtex @article{Piwowarski2009Sound-and-Complete, abstract = {In information retrieval research, comparing retrieval approaches requires test collections consisting of documents, user requests and relevance assessments. Obtaining relevance assessments that are as sound and complete as possible is crucial for the comparison of retrieval approaches. In XML retrieval, the problem of obtaining sound and complete relevance assessments is further complicated by the structural relationships between retrieval results. A major difference between XML retrieval and flat document retrieval is that the relevance of elements (the retrievable units) is not independent of that of related elements. This has major consequences for the gathering of relevance assessments. This paper describes investigations into the creation of sound and complete relevance assessments for the evaluation of content-oriented XML retrieval as carried out at INEX, the evaluation campaign for XML retrieval. The campaign, now in its seventh year, has had three substantially different approaches to gather assessments and has finally settled on a highlighting method for marking relevant passages within documents - even though the objective is to collect assessments at element level. The different methods of gathering assessments at INEX are discussed and contrasted. The highlighting method is shown to be the most reliable of the methods.}
, author = {
Benjamin
Piwowarski
}
, author = {
Andrew
Trotman
}
, author = {
Mounia
Lalmas
}
, journal = {ACM Transactions On Information Systems}
, month = {jan}
, number = {1}
, private = {Yes}
, title = {Sound and Complete Relevance Assessments for XML Retrieval}
, type = {Journal}
, volume = {27}
, year = {2009}
}Abstract In information retrieval research, comparing retrieval approaches requires test collections consisting of documents, user requests and relevance assessments. Obtaining relevance assessments that are as sound and complete as possible is crucial for the comparison of retrieval approaches. In XML retrieval, the problem of obtaining sound and complete relevance assessments is further complicated by the structural relationships between retrieval results. A major difference between XML retrieval and flat document retrieval is that the relevance of elements (the retrievable units) is not independent of that of related elements. This has major consequences for the gathering of relevance assessments. This paper describes investigations into the creation of sound and complete relevance assessments for the evaluation of content-oriented XML retrieval as carried out at INEX, the evaluation campaign for XML retrieval. The campaign, now in its seventh year, has had three substantially different approaches to gather assessments and has finally settled on a highlighting method for marking relevant passages within documents - even though the objective is to collect assessments at element level. The different methods of gathering assessments at INEX are discussed and contrasted. The highlighting method is shown to be the most reliable of the methods. |
 |
Benjamin
Piwowarski
and
Mounia
Lalmas
. Structured Information Retrieval and Quantum Theory In Proceedings of the Third Quantum Interaction Symposium March 2009. |
 |
Benjamin
Piwowarski
and
Mounia
Lalmas
. A Quantum-based Model for Interactive Information Retrieval In Proceeedings of the 2nd International Conference on the Theory of Information Retrieval Sep 2009.Bibtex @inproceedings{Piwowarski2009B-A-Quantum-based-Model, author = {
Benjamin
Piwowarski
}
, author = {
Mounia
Lalmas
}
, booktitle = {Proceeedings of the 2nd International Conference on the Theory of Information Retrieval}
, crossref = {ICTIR2009}
, editor = {
Leif
Azzopardi
}
, editor = {
Gabriella
Kazai
}
, editor = {
Stephen E.
Robertson
}
, editor = {
Stefan M.
Rüger
}
, editor = {
Milad
Shokouhi
}
, editor = {
Dawei
Song
}
, editor = {
Emine
Yilmaz
}
, group = {Quantum Information; Information Retrieval}
, month = {Sep}
, publisher = {Springer}
, short-title = {Proceedings of the 2nd ICTIR conference}
, title = {A Quantum-based Model for Interactive Information Retrieval}
, type = {International Conference}
, volume = {5766}
, year = {2009}
} |
  |
Benjamin
Piwowarski
and
Mounia
Lalmas
. A Quantum-based Model for Interactive Information Retrieval (extended version) In ArXiv e-prints
no. 0906.4026, .
September 2009.Bibtex @article{Piwowarski2009A-Quantum-based-Model, author = {
Benjamin
Piwowarski
}
, author = {
Mounia
Lalmas
}
, group = {Quantum Information; Information Retrieval}
, journal = {ArXiv e-prints}
, month = {September}
, number = {0906.4026}
, title = {A Quantum-based Model for Interactive Information Retrieval (extended version)}
, year = {2009}
} |
  |
Georges
Dupret
and
Benjamin
Piwowarski
. A user browsing model to predict search engine click data from past observations In SIGIR 2008 Singapore, July 2008.Bibtex @inproceedings{Dupret2008A-user-browsing, address = {Singapore}
, author = {
Georges
Dupret
}
, author = {
Benjamin
Piwowarski
}
, bibsource = {DBLP, http://dblp.uni-trier.de}
, booktitle = {SIGIR 2008}
, crossref = {SIGIR2008}
, editor = {
Sung-Hyon
Myaeng
}
, editor = {
Douglas W.
Oard
}
, editor = {
Fabrizio
Sebastiani
}
, editor = {
Tat-Seng
Chua
}
, editor = {
Mun-Kew
Leong
}
, isbn = {978-1-60558-164-4}
, location = {Singapore, Singapore}
, month = {July}
, publisher = {ACM}
, short-title = {Proceedings of the 31st Annual International ACM SIGIR}
, title = {A user browsing model to predict search engine click data from past observations}
, type = {International Conference}
, year = {2008}
} |
  |
Olivier
Motelet
,
Benjamin
Piwowarski
,
Georges
Dupret
,
Jose A.
Pino
, and
Nelson
Baloian
. Enhancing Educational-Material Retrieval using Authored-Lesson Metadata In Fourteenth String Processing and Information Retrieval Symposium (SPIRE 2007) Santiago, Chile, October 2007.Bibtex @inproceedings{Motelet2007Enhancing-Educational-Material, address = {Santiago, Chile}
, author = {
Olivier
Motelet
}
, author = {
Benjamin
Piwowarski
}
, author = {
Georges
Dupret
}
, author = {
Jose A.
Pino
}
, author = {
Nelson
Baloian
}
, booktitle = {Fourteenth String Processing and Information Retrieval Symposium (SPIRE 2007)}
, month = {October}
, owner = {bpiwowar}
, timestamp = {2007.07.20}
, title = {Enhancing Educational-Material Retrieval using Authored-Lesson Metadata}
, type = {International Conference}
, year = {2007}
} |
   |
Georges
Dupret
,
Vanessa
Murdock
, and
Benjamin
Piwowarski
. Web Search Engine Evaluation using Clickthrough Data and a User Model In Query Log Analysis: Social and Technological Challenges 2007.Bibtex @inproceedings{Dupret2007Web-Search-Engine, abstract = {Traditional search engine evaluation relies on a list of query document pairs along with a score reflecting the document relevance to the query. The score is generally a human assessment, but nothing is said explicitly about the actual user behavior. In this paper we illustrate with a toy model that once the user behavior is agreed upon, the human assessment can be eliminated and the engine performance can be evaluated based on the clickthrough data of past users.}
, author = {
Georges
Dupret
}
, author = {
Vanessa
Murdock
}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Query Log Analysis: Social and Technological Challenges}
, title = {Web Search Engine Evaluation using Clickthrough Data and a User Model}
, type = {International Workshop}
, year = {2007}
}Abstract Traditional search engine evaluation relies on a list of query document pairs along with a score reflecting the document relevance to the query. The score is generally a human assessment, but nothing is said explicitly about the actual user behavior. In this paper we illustrate with a toy model that once the user behavior is agreed upon, the human assessment can be eliminated and the engine performance can be evaluated based on the clickthrough data of past users. |
 |
Olivier
Motelet
,
Nelson
Baloian
,
Benjamin
Piwowarski
, and
Jose A.
Pino
. Taking advantage of the semantics of a lesson graph based on learning objects In The 13th International Conference on Artificial Intelligence in Education (AIED 2007) July 2007.Bibtex @inproceedings{Motelet2007Taking-advantage, author = {
Olivier
Motelet
}
, author = {
Nelson
Baloian
}
, author = {
Benjamin
Piwowarski
}
, author = {
Jose A.
Pino
}
, booktitle = {The 13th International Conference on Artificial Intelligence in Education (AIED 2007)}
, month = {July}
, owner = {bpiwowar}
, publisher = {IOS Press}
, timestamp = {2007.05.28}
, title = {Taking advantage of the semantics of a lesson graph based on learning objects}
, type = {International Conference}
, year = {2007}
} |
 |
Gabriella
Kazai
,
Benjamin
Piwowarski
, and
Stephen E.
Robertson
. Effort Precision and Gain-recall based on a probabilistic navigation model In 1st International Conference on the Theory of Information Retrieval October 2007.Bibtex @inproceedings{Kazai2007Effort-Precision, author = {
Gabriella
Kazai
}
, author = {
Benjamin
Piwowarski
}
, author = {
Stephen E.
Robertson
}
, booktitle = {1st International Conference on the Theory of Information Retrieval}
, month = {October}
, owner = {bpiwowar}
, private = {Yes}
, timestamp = {2007.10.01}
, title = {Effort Precision and Gain-recall based on a probabilistic navigation model}
, type = {International Conference}
, year = {2007}
} |
   |
Benjamin
Piwowarski
and
Hugo
Zaragoza
. Predictive User Click Models Based on Click-through History In Proceedings of the Sixteenth Conference on Information and Knowledge Management (CIKM 2007) Lisbon, Portugal, November 2007,
pages 175–182.
Bibtex @inproceedings{Piwowarski2007Predictive-User, abstract = {Web search engines consistently collect information about users interaction with the system: they record the query they issued, the URL of presented and selected documents along with their ranking. This information is very valuable: It is a poll over millions of users on the most various topics and it has been used in many ways to mine users interests and preferences. Query logs have the potential to partially alleviate the search engines from thousand of searches by providing a way to predict answers for a subset of queries and users without knowing the content of a document. Even if the predicted result is at rank one, this analysis might be of interest: If there is enough confidence on a user's click, we might redirect the user directly to the page whose link would be clicked. In this paper, we present three different models for predicting user clicks, ranging from most specific ones (using only past user history for the query) to very general ones (aggregating data over all users for a given query). The former model has a very high precision at low recall values, while the latter can achieve high recalls. We show that it is possible to combine the different models to predict with high accuracy (over 90%) a high subset of query sessions (24% of all the sessions).}
, address = {Lisbon, Portugal}
, author = {
Benjamin
Piwowarski
}
, author = {
Hugo
Zaragoza
}
, booktitle = {Proceedings of the Sixteenth Conference on Information and Knowledge Management (CIKM 2007)}
, month = {November}
, owner = {bpiwowar}
, pages = {175–182}
, publisher = {ACM}
, timestamp = {2007.07.20}
, title = {Predictive User Click Models Based on Click-through History}
, type = {International Conference}
, year = {2007}
}Abstract Web search engines consistently collect information about users interaction with the system: they record the query they issued, the URL of presented and selected documents along with their ranking. This information is very valuable: It is a poll over millions of users on the most various topics and it has been used in many ways to mine users interests and preferences. Query logs have the potential to partially alleviate the search engines from thousand of searches by providing a way to predict answers for a subset of queries and users without knowing the content of a document. Even if the predicted result is at rank one, this analysis might be of interest: If there is enough confidence on a user's click, we might redirect the user directly to the page whose link would be clicked. In this paper, we present three different models for predicting user clicks, ranging from most specific ones (using only past user history for the query) to very general ones (aggregating data over all users for a given query). The former model has a very high precision at low recall values, while the latter can achieve high recalls. We show that it is possible to combine the different models to predict with high accuracy (over 90%) a high subset of query sessions (24% of all the sessions). |
    |
Benjamin
Piwowarski
,
Patrick
Gallinari
, and
Georges
Dupret
. An Extension of Precision-Recall with User Modelling (PRUM): Application to XML Retrieval In ACM Transactions On Information Systems 25
(1).
2007.Bibtex @article{Piwowarski2007An-Extension-of-Precision-Recall, abstract = {Standard Information Retrieval (IR) metrics are not well suited for new paradigms like XML or Web IR in which retrievable information units are document elements and/or sets of related documents. Part of the problem stems from the classical hypotheses on the user models: They do not take into account the structural or logical context of document elements or the possibility of navigation between units. This article proposes an explicit and formal user model that encompasses a large variety of user behaviors. Based on this model, we extend the probabilistic precision-recall metric to deal with the new IR paradigms.}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, author = {
Georges
Dupret
}
, doi = {10.1145/1198296.1198297}
, journal = {ACM Transactions On Information Systems}
, keywords = {XML}
, keywords = {Information Retrieval}
, keywords = {Evaluation}
, number = {1}
, owner = {bpiwowar}
, timestamp = {2005.11.16}
, title = {An Extension of Precision-Recall with User Modelling (PRUM): Application to XML Retrieval}
, type = {Journal}
, volume = {25}
, year = {2007}
}Abstract Standard Information Retrieval (IR) metrics are not well suited for new paradigms like XML or Web IR in which retrievable information units are document elements and/or sets of related documents. Part of the problem stems from the classical hypotheses on the user models: They do not take into account the structural or logical context of document elements or the possibility of navigation between units. This article proposes an explicit and formal user model that encompasses a large variety of user behaviors. Based on this model, we extend the probabilistic precision-recall metric to deal with the new IR paradigms. |
 |
Holger
Bast
,
Georges
Dupret
,
Debapriyo
Majumdar
, and
Benjamin
Piwowarski
. Discovering a Term Taxonomy from Term Similarities Using Principal Component Analysis In Semantics, Web and Mining, Joint International Workshops, EWMF 2005 and KDO 2005 Porto, Portugal, 2006,
pages 103–120.
Bibtex @inproceedings{Bast2006Discovering-a-Term, address = {Porto, Portugal}
, author = {
Holger
Bast
}
, author = {
Georges
Dupret
}
, author = {
Debapriyo
Majumdar
}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Semantics, Web and Mining, Joint International Workshops, EWMF 2005 and KDO 2005}
, editor = {
Markus
Ackermann
}
, editor = {
Bettina
Berendt
}
, editor = {
Marko
Grobelnik
}
, editor = {
Andreas
Hotho
}
, editor = {
Dunja
Mladenic
}
, editor = {
Giovanni
Semeraro
}
, editor = {
Myra
Spiliopoulou
}
, editor = {
Gerd
Stumme
}
, editor = {
Vojtech
Svátek
}
, editor = {
Maarten van
Someren
}
, pages = {103–120}
, publisher = {Springer}
, series = {Lecture Notes in Computer Science}
, title = {Discovering a Term Taxonomy from Term Similarities Using Principal Component Analysis}
, type = {International Workshop}
, volume = {4289}
, year = {2006}
} |
 |
Georges
Dupret
and
Benjamin
Piwowarski
. Principal Components for Automatic Term Hierarchy Building In Proceedings of the 13th International Symposium on String Processing and Information Retrieval (SPIRE 2006) 2006,
pages 37–48.
Bibtex @inproceedings{Dupret2006Principal-Components, author = {
Georges
Dupret
}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Proceedings of the 13th International Symposium on String Processing and Information Retrieval (SPIRE 2006)}
, pages = {37–48}
, private = {Yes}
, publisher = {Springer}
, series = {LNCS 4209}
, title = {Principal Components for Automatic Term Hierarchy Building}
, type = {International Conference}
, year = {2006}
} |
    |
Benjamin
Piwowarski
and
Georges
Dupret
. Evaluation in (XML) Information Retrieval: Expected Precision-Recall with User Modelling (EPRUM) In Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval Seattle, Washington, USA, aug 2006,
pages 260–267.
|
 |
Georges
Dupret
,
Benjamin
Piwowarski
,
C.
Hurtado
, and
M.
Mendoza
. A Statistical Model of Query Log Generation In Proceedings of the 13th International Symposium on String Processing and Information Retrieval (SPIRE 2006) 2006,
pages 217–228.
Bibtex @inproceedings{Dupret2006A-Statistical-Model, author = {
Georges
Dupret
}
, author = {
Benjamin
Piwowarski
}
, author = {
C.
Hurtado
}
, author = {
M.
Mendoza
}
, booktitle = {Proceedings of the 13th International Symposium on String Processing and Information Retrieval (SPIRE 2006)}
, pages = {217–228}
, publisher = {Springer}
, series = {LNCS 4209}
, title = {A Statistical Model of Query Log Generation}
, type = {International Conference}
, year = {2006}
} |
    |
Benjamin
Piwowarski
and
Patrick
Gallinari
. A Bayesian Network for XML Information Retrieval: Searching and Learning with the INEX Collection In Information Retrieval 8
(4).
December 2005,
pages 655–681.
Bibtex @article{Piwowarski2005A-Bayesian-Network, abstract = {Most recent document standards like XML rely on structured representations. On the other hand, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. The design of such systems is still an open problem. We present a new model for structured document retrieval which allows computing scores of document parts. This model is based on Bayesian networks whose conditional probabilities are learnt from a labelled collection of structured documents – which is composed of documents, queries and their associated assessments. Training these models is a complex machine learning task and is not standard. This is the focus of the paper: we propose here to train the structured Bayesian Network model using a cross-entropy training criterion. Results are presented on the INEX corpus of XML documents.}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, doi = {10.1007/s10791-005-0751-6}
, journal = {Information Retrieval}
, month = {December}
, number = {4}
, pages = {655–681}
, title = {A Bayesian Network for XML Information Retrieval: Searching and Learning with the INEX Collection}
, type = {Journal}
, volume = {8}
, year = {2005}
}Abstract Most recent document standards like XML rely on structured representations. On the other hand, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. The design of such systems is still an open problem. We present a new model for structured document retrieval which allows computing scores of document parts. This model is based on Bayesian networks whose conditional probabilities are learnt from a labelled collection of structured documents – which is composed of documents, queries and their associated assessments. Training these models is a complex machine learning task and is not standard. This is the focus of the paper: we propose here to train the structured Bayesian Network model using a cross-entropy training criterion. Results are presented on the INEX corpus of XML documents. |
   |
Huyen-Trang
Vu
,
Benjamin
Piwowarski
, and
Patrick
Gallinari
. Filtering in XML Retrieval: a Prospective Analysis In XML and Information Retrieval workshop of SIGIR 2004 University of Sheffield, UK, July 2004.Bibtex @inproceedings{Vu2004Filtering-in-XML-Retrieval:, abstract = {In XML retrieval paradigm, elements inside a document can be returned as answers to a user request. Since the information in an element is more specific than in a whole document, this might reduce the user effort in finding relevant information. However, as XML documents are composed of nested elements, many of which being possibly relevant to the user information need, retrieval systems must take care of the overlap issue before showing answers to the user. In this paper, we investigate how to disallow overlapping results by considering it as a filtering problem.}
, address = {University of Sheffield, UK}
, author = {
Huyen-Trang
Vu
}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, booktitle = {XML and Information Retrieval workshop of SIGIR 2004}
, month = {July}
, title = {Filtering in XML Retrieval: a Prospective Analysis}
, type = {International Workshop}
, year = {2004}
}Abstract In XML retrieval paradigm, elements inside a document can be returned as answers to a user request. Since the information in an element is more specific than in a whole document, this might reduce the user effort in finding relevant information. However, as XML documents are composed of nested elements, many of which being possibly relevant to the user information need, retrieval systems must take care of the overlap issue before showing answers to the user. In this paper, we investigate how to disallow overlapping results by considering it as a filtering problem. |
   |
Benjamin
Piwowarski
and
Mounia
Lalmas
. Interface pour l'évaluation de systèmes de recherche sur des documents XML In Premiere COnference en Recherche d'Information et Applications (CORIA'04) Toulouse, France, March 2004.Bibtex @inproceedings{Piwowarski2004Interface-pour, abstract = {L'évaluation des systèmes de Recherche d'Information est depuis le début un des piliers de l'évolution de ce domaine. La qualité de l'évaluation est d'une importance capitale puisqu'elle permet de discriminer les différents modèles entre eux. Il est donc primordial de pouvoir constituer des corpus où les questions et leurs jugements de pertinence associés sont de qualité. Alors qu'avec des documents plats les méthodes sont bien établies, ce n'est plus le cas avec des documents structurés de type XML. Il est donc nécessaire de développer de nouvelle faç con d'évaluer. Nous présentons dans cet article l'interface utilisée lors de la campagne INEX 2003 qui permet d'évaluer de faç con la plus consistante et la plus exhaustive possible les documents XML.}
, address = {Toulouse, France}
, author = {
Benjamin
Piwowarski
}
, author = {
Mounia
Lalmas
}
, booktitle = {Premiere COnference en Recherche d'Information et Applications (CORIA'04)}
, month = {March}
, title = {Interface pour l'évaluation de systèmes de recherche sur des documents XML}
, type = {National conference}
, year = {2004}
}Abstract L'évaluation des systèmes de Recherche d'Information est depuis le début un des piliers de l'évolution de ce domaine. La qualité de l'évaluation est d'une importance capitale puisqu'elle permet de discriminer les différents modèles entre eux. Il est donc primordial de pouvoir constituer des corpus où les questions et leurs jugements de pertinence associés sont de qualité. Alors qu'avec des documents plats les méthodes sont bien établies, ce n'est plus le cas avec des documents structurés de type XML. Il est donc nécessaire de développer de nouvelle faç con d'évaluer. Nous présentons dans cet article l'interface utilisée lors de la campagne INEX 2003 qui permet d'évaluer de faç con la plus consistante et la plus exhaustive possible les documents XML. |
   |
Benjamin
Piwowarski
and
Patrick
Gallinari
. An algebra for probabilistic XML Retrieval In The First Twente Data Management Workshop SIKS.
Enschede, The Netherlands, June 2004.Bibtex @inproceedings{Piwowarski2004An-algebra-for-probabilistic, abstract = {In this paper, we describe a new algebra for XML retrieval. We first describe how to transform an XPath-like query in our algebra. The latter contains a vague predicate, about, which defines a set of document parts within an XML document that fullfill a query expressed as in ``flat'' Information Retrieval - a query that contains only constraints on content but not on structure. This predicate is evaluated in a probabilistic way: we thus need a probabilistic interpretation of our algebra. Answers to query needs expressed with vague content and vague structure constraints can then be evaluated.}
, address = {Enschede, The Netherlands}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, booktitle = {The First Twente Data Management Workshop}
, month = {June}
, organization = {SIKS}
, title = {An algebra for probabilistic XML Retrieval}
, type = {International Workshop}
, year = {2004}
}Abstract In this paper, we describe a new algebra for XML retrieval. We first describe how to transform an XPath-like query in our algebra. The latter contains a vague predicate, about, which defines a set of document parts within an XML document that fullfill a query expressed as in ``flat'' Information Retrieval - a query that contains only constraints on content but not on structure. This predicate is evaluated in a probabilistic way: we thus need a probabilistic interpretation of our algebra. Answers to query needs expressed with vague content and vague structure constraints can then be evaluated. |
    |
Benjamin
Piwowarski
and
Mounia
Lalmas
. Providing Consistent and Exhaustive Relevance Assessments for XML Retrieval Evaluation In Proceedings of the Thirteenth Conference on Information and Knowledge Management (CIKM 2004) Washington D.C., U.S.A., November 2004.Bibtex @inproceedings{Piwowarski2004Providing-Consistent, abstract = {Comparing retrieval approaches requires test collections, which consist of documents, queries and relevance assessments. Obtaining consistent and exhaustive relevance assessments is crucial for the appropriate comparison of retrieval approaches. Whereas the evaluation methodology for flat text retrieval approaches is well established, the evaluation of XML retrieval approaches is a research issue. This is because XML documents are composed of nested components, which cannot be considered as independent in terms of relevance. This paper describes the methodology adopted in INEX (the INitiative for the Evaluation of XML Retrieval) to ensure consistent and exhaustive relevance assessments.}
, address = {Washington D.C., U.S.A.}
, author = {
Benjamin
Piwowarski
}
, author = {
Mounia
Lalmas
}
, booktitle = {Proceedings of the Thirteenth Conference on Information and Knowledge Management (CIKM 2004)}
, doi = {10.1145/1031171.1031246}
, month = {November}
, title = {Providing Consistent and Exhaustive Relevance Assessments for XML Retrieval Evaluation}
, type = {International Conference}
, year = {2004}
}Abstract Comparing retrieval approaches requires test collections, which consist of documents, queries and relevance assessments. Obtaining consistent and exhaustive relevance assessments is crucial for the appropriate comparison of retrieval approaches. Whereas the evaluation methodology for flat text retrieval approaches is well established, the evaluation of XML retrieval approaches is a research issue. This is because XML documents are composed of nested components, which cannot be considered as independent in terms of relevance. This paper describes the methodology adopted in INEX (the INitiative for the Evaluation of XML Retrieval) to ensure consistent and exhaustive relevance assessments. |
 |
Benjamin
Piwowarski
and
Patrick
Gallinari
. Structure, recherche d'information et apprentissage Lyon, France, January 2003.Bibtex @inproceedings{Piwowarski2003Structure-recherche, address = {Lyon, France}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, month = {January}
, title = {Structure, recherche d'information et apprentissage}
, type = {National conference}
, year = {2003}
} |
   |
Benjamin
Piwowarski
. Techniques d'apprentissage pour le traitement d'informations structurées : application à la recherche d'information University Paris 6.
2003.Bibtex @thesis{Piwowarski2003Techniques-dapprentissage, abstract = {Dans le contexte de l'Accès à l'Information, la notion de document électronique a considérablement évoluée. En peu de temps, nous sommes passés d'un monde où la représentation dominante d'un document était constituée de la suite de ses mots ou de ses phrases à une représentation bien plus riche et structurée (multimédia). Cette évolution touche les communautés de la Recherche d'Information (RI), des Bases de Données et de l'Apprentissage Automatique qui sont celles qui sont au coeur de notre travail. Dans ce manuscrit, nous présentons un modèle complet de RI structurée basé sur les Réseaux Bayésiens (RB). Notre modèle est capable de répondre à des questions portant à la fois sur la structure et le contenu. Notre modèle peut également apprendre de manière automatique ses paramètres. Nous nous intéressons également à la définition d'une nouvelle mesure d'évaluation des systèmes de RI structurés.}
, address = {Paris, France}
, author = {
Benjamin
Piwowarski
}
, month = {July}
, school = {University Paris 6}
, title = {Techniques d'apprentissage pour le traitement d'informations structurées : application à la recherche d'information}
, year = {2003}
}Abstract Dans le contexte de l'Accès à l'Information, la notion de document électronique a considérablement évoluée. En peu de temps, nous sommes passés d'un monde où la représentation dominante d'un document était constituée de la suite de ses mots ou de ses phrases à une représentation bien plus riche et structurée (multimédia). Cette évolution touche les communautés de la Recherche d'Information (RI), des Bases de Données et de l'Apprentissage Automatique qui sont celles qui sont au coeur de notre travail. Dans ce manuscrit, nous présentons un modèle complet de RI structurée basé sur les Réseaux Bayésiens (RB). Notre modèle est capable de répondre à des questions portant à la fois sur la structure et le contenu. Notre modèle peut également apprendre de manière automatique ses paramètres. Nous nous intéressons également à la définition d'une nouvelle mesure d'évaluation des systèmes de RI structurés. |
   |
Benjamin
Piwowarski
and
Patrick
Gallinari
. A Machine Learning Model for Information Retrieval with Structured Documents In Machine Learning and Data Mining in Pattern Recognition Leipzig, Germany, July 2003,
pages 425–438.
Bibtex @inproceedings{Piwowarski2003A-Machine-Learning, abstract = {Most recent document standards rely on structured representations. On the other hand, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. Only a few models have been proposed for handling structured documents, and the design of such systems is still an open problem. We present here a new model for structured document retrieval which allows to compute and to combine the scores of document parts. It is based on bayesian networks and allows for learning the model parameters in the presence of incomplete data. We present an application of this model for ad-hoc retrieval and evaluate its performances on a small structured collection. The model can also be extended to cope with other tasks such as interactive navigation in structured documents or corpus}
, address = {Leipzig, Germany}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, booktitle = {Machine Learning and Data Mining in Pattern Recognition}
, editor = {
Petra
Perner
}
, month = {July}
, pages = {425–438}
, publisher = {Springer Verlag}
, title = {A Machine Learning Model for Information Retrieval with Structured Documents}
, type = {International Conference}
, year = {2003}
}Abstract Most recent document standards rely on structured representations. On the other hand, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. Only a few models have been proposed for handling structured documents, and the design of such systems is still an open problem. We present here a new model for structured document retrieval which allows to compute and to combine the scores of document parts. It is based on bayesian networks and allows for learning the model parameters in the presence of incomplete data. We present an application of this model for ad-hoc retrieval and evaluate its performances on a small structured collection. The model can also be extended to cope with other tasks such as interactive navigation in structured documents or corpus |
   |
Benjamin
Piwowarski
. Working group report: the Assessment Tool In INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop Dagstuhl, Germany, December 2003.Bibtex @inproceedings{Piwowarski2003Working-group, abstract = {This paper is the report of the working group on the evaluation assessment interface that was used in INEX'03. This paper describes the changes that are planned for INEX'04 and the different issues that were raised during the working group session.}
, address = {Dagstuhl, Germany}
, author = {
Benjamin
Piwowarski
}
, booktitle = {INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop}
, editor = {
Norbert
Fuhr
}
, editor = {
Mounia
Lalmas
}
, editor = {
Saadia
Malik
}
, month = {December}
, title = {Working group report: the Assessment Tool}
, type = {International Workshop}
, year = {2003}
}Abstract This paper is the report of the working group on the evaluation assessment interface that was used in INEX'03. This paper describes the changes that are planned for INEX'04 and the different issues that were raised during the working group session. |
   |
Benjamin
Piwowarski
,
Huyen-Trang
Vu
, and
Patrick
Gallinari
. Bayesian Networks and INEX'03 In INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop Dagstuhl, Germany, December 2003.Bibtex @inproceedings{Piwowarski2003Bayesian-Networks, abstract = {We present a Bayesian framework for XML document retrieval. This framework allows us to consider content-only (CO) queries. We perform the retrieval task using inference in our network. The proposed model can adapt to a specific corpus through parameter learning and it uses a grammar to speed up the retrieval process in large or distributed databases. We also experimented list filtering to avoid overlap in the retrieved element list.}
, address = {Dagstuhl, Germany}
, author = {
Benjamin
Piwowarski
}
, author = {
Huyen-Trang
Vu
}
, author = {
Patrick
Gallinari
}
, booktitle = {INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop}
, editor = {
Norbert
Fuhr
}
, editor = {
Mounia
Lalmas
}
, editor = {
Saadia
Malik
}
, month = {December}
, title = {Bayesian Networks and INEX'03}
, type = {International Workshop}
, year = {2003}
}Abstract We present a Bayesian framework for XML document retrieval. This framework allows us to consider content-only (CO) queries. We perform the retrieval task using inference in our network. The proposed model can adapt to a specific corpus through parameter learning and it uses a grammar to speed up the retrieval process in large or distributed databases. We also experimented list filtering to avoid overlap in the retrieved element list. |
 |
Gabriella
Kazai
,
Mounia
Lalmas
, and
Benjamin
Piwowarski
. INEX Guidelines for Topic Development In Proceedings of INEX 2003 2003.Bibtex @inproceedings{Kazai2003INEX-Guidelines, author = {
Gabriella
Kazai
}
, author = {
Mounia
Lalmas
}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Proceedings of INEX 2003}
, crossref = {INEX2003}
, editor = {
Norbert
Fuhr
}
, editor = {
Mounia
Lalmas
}
, editor = {
Saadia
Malik
}
, keywords = {Evaluation}
, owner = {bpiwowar}
, title = {INEX Guidelines for Topic Development}
, type = {International Workshop}
, year = {2003}
} |
   |
Benjamin
Piwowarski
and
Patrick
Gallinari
. Expected Ratio of Relevant Units: A Measure for Structured Information Retrieval In INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop Dagstuhl, France, December 2003.Bibtex @inproceedings{Piwowarski2003Expected-Ratio, abstract = {Since the 60's, evaluation has been a key problem for Information Retrieval (IR) systems and has been extensively discussed in the IR community. New IR paradigms, like Structured Information Retrieval (SIR), make classical evaluation measures inappropriate. A few tentative extensions to these measures has been proposed but are also inadequate. We do propose in this paper a new measure which is a generalisation of recall. This measure takes into account the specificity of SIR, when elements to be retrieved are linked by structural relationships. We show an instantiation of this measure on the INEX database and present experiments to show how well it is adapted to SIR evaluation.}
, address = {Dagstuhl, France}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, booktitle = {INitiative for the Evaluation of XML Retrieval (INEX). Proceedings of the Second INEX Workshop}
, crossref = {INEX2003}
, editor = {
Norbert
Fuhr
}
, editor = {
Mounia
Lalmas
}
, editor = {
Saadia
Malik
}
, keywords = {Evaluation}
, month = {December}
, owner = {bpiwowar}
, title = {Expected Ratio of Relevant Units: A Measure for Structured Information Retrieval}
, type = {International Workshop}
, year = {2003}
}Abstract Since the 60's, evaluation has been a key problem for Information Retrieval (IR) systems and has been extensively discussed in the IR community. New IR paradigms, like Structured Information Retrieval (SIR), make classical evaluation measures inappropriate. A few tentative extensions to these measures has been proposed but are also inadequate. We do propose in this paper a new measure which is a generalisation of recall. This measure takes into account the specificity of SIR, when elements to be retrieved are linked by structural relationships. We show an instantiation of this measure on the INEX database and present experiments to show how well it is adapted to SIR evaluation. |
  |
Benjamin
Piwowarski
and
Patrick
Gallinari
. A Bayesian Network Model for Page Retrieval in a Hierarchically Structured Collection In XML Workshop of the 25th ACM SIGIR Conference Tampere, Finland, August 2002.Bibtex @inproceedings{Piwowarski2002A-Bayesian-Network, abstract = {Most recent document standards rely on structured representations. Nevertheless, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. Only a few models have been proposed for handling structured documents, and the design of such systems is still an open problem. We present here a new model for structured document retrieval which allows to compute and to combine the scores of document parts. It is based on bayesian networks and makes use of machine learning algorithms for learning the model parameters in the presence of incomplete data. We present an application of this model for ad-hoc retrieval and evaluate its performances on a small structured collection.}
, address = {Tampere, Finland}
, author = {
Benjamin
Piwowarski
}
, author = {
Patrick
Gallinari
}
, booktitle = {XML Workshop of the 25th ACM SIGIR Conference}
, month = {August}
, title = {A Bayesian Network Model for Page Retrieval in a Hierarchically Structured Collection}
, type = {International Workshop}
, year = {2002}
}Abstract Most recent document standards rely on structured representations. Nevertheless, current information retrieval systems have been developed for flat document representations and cannot be easily extended to cope with more complex document types. Only a few models have been proposed for handling structured documents, and the design of such systems is still an open problem. We present here a new model for structured document retrieval which allows to compute and to combine the scores of document parts. It is based on bayesian networks and makes use of machine learning algorithms for learning the model parameters in the presence of incomplete data. We present an application of this model for ad-hoc retrieval and evaluate its performances on a small structured collection. |
  |
Benjamin
Piwowarski
,
Georges-Etienne
Faure
, and
Patrick
Gallinari
. Bayesian networks and INEX In Proceedings of the First Annual Workshop of the Initiative for the Evaluation of XML retrieval (INEX) Dagstuhl, Germany, December 2002.Bibtex @inproceedings{Piwowarski2002Bayesian-networks, abstract = {We present a bayesian framework for XML document retrieval. This framework allows us to consider content only and content and structure queries. We perform the retrieval task using inference in our network. Our model can adapt to a specific corpora through parameter learning.}
, address = {Dagstuhl, Germany}
, author = {
Benjamin
Piwowarski
}
, author = {
Georges-Etienne
Faure
}
, author = {
Patrick
Gallinari
}
, booktitle = {Proceedings of the First Annual Workshop of the Initiative for the Evaluation of XML retrieval (INEX)}
, month = {December}
, publisher = {ERCIM}
, series = {DELOS workshop}
, title = {Bayesian networks and INEX}
, type = {International Workshop}
, year = {2002}
}Abstract We present a bayesian framework for XML document retrieval. This framework allows us to consider content only and content and structure queries. We perform the retrieval task using inference in our network. Our model can adapt to a specific corpora through parameter learning. |
   |
Benjamin
Piwowarski
,
Ludovic
Denoyer
, and
Patrick
Gallinari
. Un modèle pour la recherche d'information sur des documents structurés Saint-Malo, France, March 2002.Bibtex @inproceedings{Piwowarski2002Un-modele-pour, abstract = {Avec l'émergence de nouveaux standards comme le XHTML ou le DocBook où la structure des documents est apparente, la communauté de recherche d'information a commencé à s'intéresser à l'utilisation de cette nouvelle source d'information. La tâche est ardue, car il s'agit de concilier de sources d'informations de natures différentes, à savoir le texte et la structure. Quelques modèles ont fait leur apparition ; mais ces travaux manquent encore de maturité et n'utilisent la structure que d'une manière simple. Le cadre théorique que nous présentons dans ce papier a pour vocation de permettre une prise en compte de la structure dans les tâches de recherche documentaire et de catégorisation. Ce modèle basé sur l'utilisation de réseaux bayésiens est capable de s'adapter à de nouvelles bases de données grâce à des techniques d'apprentissage numérique. Il offre également des perspectives de développement intéressantes comme par exemple la navigation interactive dans une base de données.}
, address = {Saint-Malo, France}
, author = {
Benjamin
Piwowarski
}
, author = {
Ludovic
Denoyer
}
, author = {
Patrick
Gallinari
}
, month = {March}
, title = {Un modèle pour la recherche d'information sur des documents structurés}
, type = {National conference}
, year = {2002}
}Abstract Avec l'émergence de nouveaux standards comme le XHTML ou le DocBook où la structure des documents est apparente, la communauté de recherche d'information a commencé à s'intéresser à l'utilisation de cette nouvelle source d'information. La tâche est ardue, car il s'agit de concilier de sources d'informations de natures différentes, à savoir le texte et la structure. Quelques modèles ont fait leur apparition ; mais ces travaux manquent encore de maturité et n'utilisent la structure que d'une manière simple. Le cadre théorique que nous présentons dans ce papier a pour vocation de permettre une prise en compte de la structure dans les tâches de recherche documentaire et de catégorisation. Ce modèle basé sur l'utilisation de réseaux bayésiens est capable de s'adapter à de nouvelles bases de données grâce à des techniques d'apprentissage numérique. Il offre également des perspectives de développement intéressantes comme par exemple la navigation interactive dans une base de données. |
   |
Benjamin
Piwowarski
. Learning in Information Retrieval: a Probabilistic Differential Approach In Proceedings of the BCS-IRSG, 22nd Annual Colloquium on Information Retrieval Research Sidney Sussex College, Cambridge, England, April 2000. |
   |
Benjamin
Piwowarski
. Apprentissage et Recherche Documentaire : une Approche Probabiliste Différentielle In Colloque Francophone sur l'Apprentissage Automatique (CAP'2000) Saint-Etienne, France, June 2000.Bibtex @inproceedings{Piwowarski2000Apprentissage-et-Recherche, abstract = {Le but de la recherche documentaire (RD) consiste à trouver, parmi une base de documents, ceux qui répondent le mieux à une demande formulée par un utilisateur. Les expériences réalisées ont montré qu'il était impossible d'obtenir des résultats satisfaisants avec des systèmes où la représentation des documents ainsi que les paramètres utilisés lors de la recherche étaient figés. C'est pourquoi on emploie l'apprentissage pour modifier les paramètres du système de recherche, en se basant sur les jugements (feedback) que les utilisateurs peuvent porter sur la qualité des documents trouvés. Pourtant, l'application de telles techniques reste problématique. En effet, celles-ci sont soit complexes à mettre en oeuvre (traitement off-line), soit difficiles à contrôler. Nous proposons une approche est basée sur un modèle probabiliste de recherche documentaire qui permet d'utiliser le feedback de manière rapide et incrémentale. Dans cet article, nous étendons ce modèle pour ne plus regarder si un document répond à une requête dans l'absolu mais plutôt relativement à une base documentaire donnée. Ainsi, un jugement portant sur un seul document modifie l'ensemble du processus de recherche améliorant ainsi la rapidité de l'apprentissage. De plus, cette extension facilite notablement l'initialisation de la représentation des documents.}
, address = {Saint-Etienne, France}
, author = {
Benjamin
Piwowarski
}
, booktitle = {Colloque Francophone sur l'Apprentissage Automatique (CAP'2000)}
, keywords = {national conference}
, month = {June}
, title = {Apprentissage et Recherche Documentaire : une Approche Probabiliste Différentielle}
, type = {National conference}
, year = {2000}
}Abstract Le but de la recherche documentaire (RD) consiste à trouver, parmi une base de documents, ceux qui répondent le mieux à une demande formulée par un utilisateur. Les expériences réalisées ont montré qu'il était impossible d'obtenir des résultats satisfaisants avec des systèmes où la représentation des documents ainsi que les paramètres utilisés lors de la recherche étaient figés. C'est pourquoi on emploie l'apprentissage pour modifier les paramètres du système de recherche, en se basant sur les jugements (feedback) que les utilisateurs peuvent porter sur la qualité des documents trouvés. Pourtant, l'application de telles techniques reste problématique. En effet, celles-ci sont soit complexes à mettre en oeuvre (traitement off-line), soit difficiles à contrôler. Nous proposons une approche est basée sur un modèle probabiliste de recherche documentaire qui permet d'utiliser le feedback de manière rapide et incrémentale. Dans cet article, nous étendons ce modèle pour ne plus regarder si un document répond à une requête dans l'absolu mais plutôt relativement à une base documentaire donnée. Ainsi, un jugement portant sur un seul document modifie l'ensemble du processus de recherche améliorant ainsi la rapidité de l'apprentissage. De plus, cette extension facilite notablement l'initialisation de la représentation des documents. |