@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix dc: <http://purl.org/dc/terms/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://data.loterre.fr/ark:/67375/8LP> a owl:Ontology, skos:ConceptScheme .
<http://data.loterre.fr/ark:/67375/8LP-Z2DL85DC-R>
  skos:prefLabel "apprentissage par renforcement à partir de rétroaction humaine"@fr, "reinforcement learning from human feedback"@en ;
  skos:example "The first step of RLHF is to obtain an initial LM which is usually trained with the flatten-and-concatenation-based modeling strategy-concatenate instruction input and all other resources (if they exist) into one input sequence and train the LM to generate the ground-truth output (as we have introduced before). (Lou, Zhang & Yin, 2024)"@en, "Meanwhile the performance of RLHF highly relies on the quality of its human preference annotations. (Lou, Zhang & Yin, 2024)"@en, "In this study we introduced the Token-Level Continuous Reward (TLCR) a novel reward model aimed at providing detailed token-based continuous rewards for Reinforcement Learning from Human Feedback (RLHF). (Yoon, Yoon, Eom, Han, Nam, Jo, On, Hasegawa-Johnson, Kim & Yoo, 2024)"@en, "The OpenAI GPT-series adopt RLHF to align the model's preference with human instructions where feedback supervision plays a big role. (Lou, Zhang & Yin, 2024)"@en, "One common method to reduce harmful outputs is reinforcement learning with human feedback (RLHF) (Zhan, Fang, Bindu, Gupta, Hashimoto & Kang, 2024)"@en ;
  skos:inScheme <http://data.loterre.fr/ark:/67375/8LP> ;
  skos:hiddenLabel "Apprentissage par renforcement à partir de rétroaction humaine"@fr, "Reinforcement Learning from Human Feedback"@en ;
  dc:created "2024-10-10T15:09:12"^^xsd:dateTime ;
  skos:altLabel "RLHF"@en ;
  a skos:Concept ;
  skos:broader <http://data.loterre.fr/ark:/67375/8LP-B7HRCSC1-T> ;
  dc:modified "2024-10-10T15:10:36"^^xsd:dateTime .

<http://data.loterre.fr/ark:/67375/8LP-B7HRCSC1-T>
  skos:prefLabel "apprentissage par renforcement"@fr, "reinforcement learning"@en ;
  a skos:Concept ;
  skos:narrower <http://data.loterre.fr/ark:/67375/8LP-Z2DL85DC-R> .

