File size: 4,210 Bytes
19cafec
 
 
 
 
 
 
 
 
 
 
 
ec0626f
 
 
 
 
 
19cafec
ec0626f
19cafec
 
 
 
 
 
 
 
 
 
ec0626f
 
 
19cafec
 
ec0626f
19cafec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec0626f
 
 
 
19cafec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec0626f
19cafec
 
 
 
 
ec0626f
19cafec
 
 
 
ec0626f
19cafec
 
 
 
 
 
ec0626f
19cafec
 
 
 
 
 
ec0626f
19cafec
 
 
 
 
ec0626f
19cafec
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import Lean
import Mathlib.Lean.Expr.Basic
import Mathlib.Lean.CoreM
import Batteries.Lean.Util.Path
import Batteries.Data.String.Matcher

open Lean
open Lean.Meta
open Lean.Elab
open Lean.Elab.Command

/-
This small file is modified from: https://github.com/semorrison/lean-training-data
You can refer to the file structure there for more details.
To run this, there is no need to download another 4GB of Mathlib files.
Just prepare a Lean project with Mathlib installed. Suppose the project is in the folder LeanProj.
Your Mathlib files should live in LeanProj/.lake/packages/mathlib.
My LeanProj/lean-toolchain says I am using version: leanprover/lean4:v4.9.0-rc2

Now, put this extract.lean in LeanProj/scripts/extract.lean

In LeanProj/lakefile.lean, below
@[default_target]
lean_lib «LeanProj» where

add this:
@[default_target]
lean_exe extract where
  root := `scripts.extract
  supportInterpreter := true

This will make the `lake exe extract` command available,
and enable the correct `compile_time_search_path%` in the script.

Now in the terminal, run:
cd path-to-LeanProj
mkdir data
lake exe extract > data/theorems.txt

The stdout (IO.println) will be directed to data/theorems.txt,
The stderr (IO.eprintln) will be shown in the terminal. You will see the progress. It takes about 15 minutes.
-/

/--
Helps print kinds of constants
and distinguish between theorems, definitions, axioms, etc.
-/
def Lean.ConstantInfo.kind : ConstantInfo → String
  | .axiomInfo  _ => "axiom"
  | .defnInfo   _ => "def"
  | .thmInfo    _ => "theorem"
  | .opaqueInfo _ => "opaque"
  | .quotInfo   _ => "quot" -- Not sure what this is!
  | .inductInfo _ => "inductive"
  | .ctorInfo   _ => "constructor"
  | .recInfo    _ => "recursor"

def truncatePrint (s : String) : IO Unit := do
  IO.println (if s.length < 1000 then s else s.take 997 ++ " ...")

def main : IO Unit := do
  IO.eprintln "\n\n"
  IO.eprintln "importing modules"
  searchPathRef.set compile_time_search_path%
  CoreM.withImportModules #[`Mathlib] (options := {entries := [
    -- (`maxHeartbeats, .ofNat 0),
    (`smartUnfolding, .ofBool false),
  ]}) do
    IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
    let env ← getEnv

    IO.eprintln "filtering out unwanted objects"
    let mut x := 0
    let mut lst := #[]
    lst := lst ++ env.constants.map₁.toArray
    lst := lst ++ env.constants.map₂.toArray
    lst := lst.filter fun (name, info) => (
      !name.isInternalOrNum -- s.startsWith "_" | .num
      && !name.isInaccessibleUserName -- s.contains '✝' || s == "_inaccessible"
      && !name.isImplementationDetail  -- s.startsWith "__"
      && !(name.toString.findSubstr? ".proof_" |>.isSome)
      && !(name.toString.findSubstr? ".match_" |>.isSome)
    )

      -- name.toString.startsWith "Nat.add"
    IO.eprintln s!"printing {lst.size} theorems"
    for (name, info) in lst do
      IO.println "----thm----"
      -- print module.
      -- eg. "import Mathlib.Data.Real.Basic"
      match (← findModuleOf? name) with
      | .none => IO.println "unknown"
      | .some mod => IO.println s!"import {mod}"
      IO.println "--thm--"

      -- print kine and decl name
      -- eg. "theorem Nat.add_comm" or "def Nat.add"
      IO.println s!"{info.kind} {name}"
      IO.println "--thm--"

      -- print decl type.
      -- eg. "∀ (n m : Nat), n + m = m + n"
      -- ppExpr is better than dbgToString
      let ppType := toString (← MetaM.run' do ppExpr info.type)
      truncatePrint ppType
      IO.println "--thm--"

      -- print doc string if exists.
      -- eg. O(|l| + |r|). Merge two lists using s as a switch.
      match (← findDocString? env name) with
      | .none => IO.println ""
      | .some doc => IO.println doc
      -- IO.println "--thm--"

      -- print decl value. These Lean.Expr are very large.
      -- match info.value? with
      -- | .none => IO.println ""
      -- | .some val => IO.println (toString (← MetaM.run' do ppExpr val))
      -- IO.println "--thm--"

      x := x + 1
      if x % 100 == 0 then
        IO.eprintln s!"printed {x} / {lst.size} theorems, {x.toFloat / lst.size.toFloat * 100}%"
      -- if x ≥ 2 then break
    IO.eprintln "done"