From 7a14f48bd7c6311605a489933adaa69791c5fc71 Mon Sep 17 00:00:00 2001 From: Louis Burke Date: Mon, 16 Sep 2024 22:10:11 -0400 Subject: [PATCH] Added cue dsl --- dictionary.cue | 93 ++++++++++++++++++++++++++++++ dsl.cue | 149 +++++++++++++++++++++++++++++++++++++++++++++++++ types.cue | 64 +++++++++++++++++++++ 3 files changed, 306 insertions(+) create mode 100644 dictionary.cue create mode 100644 dsl.cue create mode 100644 types.cue diff --git a/dictionary.cue b/dictionary.cue new file mode 100644 index 0000000..268e518 --- /dev/null +++ b/dictionary.cue @@ -0,0 +1,93 @@ +package shenikan + +// Tips: in python run +// +// import gensim.downloader +// model = gensim.downloader.load("glove-wiki-gigaword-50") # or other models @https://github.com/piskvorky/gensim-data +// +// Then find words related to what you want in model[] and do math on them. +// Then search through model to find nearby vectors using model.most_similar. +// e.g. model.most_similar(model['aunt'] - model['woman'] + model['man']) + +dictionary: { + _glyphs: { + vowel: "i": ["outer"] + vowel: "e": ["outer", "slashed"] + vowel: "a": ["slashed"] + vowel: "o": ["inner", "slashed"] + vowel: "u": ["inner"] + vowel: "y": ["both"] + + cluster: "θ": ["left", "top"] + cluster: "∫": ["left", "middle"] + cluster: "x": ["left", "bottom"] + cluster: "n": ["center", "middle"] + cluster: "p": ["right", "top"] + cluster: "t": ["right", "middle"] + cluster: "k": ["right", "bottom"] + cluster: "θl": ["left", "top", "tall"] + cluster: "∫l": ["left", "middle", "tall"] + cluster: "xl": ["left", "bottom", "tall"] + cluster: "nl": ["center", "middle", "tall"] + cluster: "pl": ["right", "top", "tall"] + cluster: "tl": ["right", "middle", "tall"] + cluster: "kl": ["right", "bottom", "tall"] + cluster: "θr": ["left", "top", "wide"] + cluster: "∫r": ["left", "middle", "wide"] + cluster: "xr": ["left", "bottom", "wide"] + cluster: "nr": ["center", "middle", "wide"] + cluster: "pr": ["right", "top", "wide"] + cluster: "tr": ["right", "middle", "wide"] + cluster: "kr": ["right", "bottom", "wide"] + cluster: "sθ": ["left", "top", "both"] + cluster: "s∫": ["left", "middle", "both"] + cluster: "sx": ["left", "bottom", "both"] + cluster: "sn": ["center", "middle", "both"] + cluster: "sp": ["right", "top", "both"] + cluster: "st": ["right", "middle", "both"] + cluster: "sk": ["right", "bottom", "both"] + + punctuation: "«": ["left"] + punctuation: ".": [] + punctuation: "»": ["right"] + } + + _dialects: { + "jukashenikan": { + "x": "ç" + "p": "j" + } + + "gazhenigan": { + "k": "g" + "∫": "ʒ" + "s": "z" + "θ": "ð" + "t": "d" + } + } + + _words: { + "t": { + noun: "thing": "See t - dmPenta for better meaning." + verb: "be" + } + } + + _pentas: { + "n x": { + // name: "Pronouns" + extremes: {i: "Fully proximal", u: "Fully distal"} + + i: pronoun: "I/me" + o: {} + } + } + + _icosas: { + "n t x": { + name: "Tests" + ii: noun: "foo" + } + } +} diff --git a/dsl.cue b/dsl.cue new file mode 100644 index 0000000..d8f2659 --- /dev/null +++ b/dsl.cue @@ -0,0 +1,149 @@ +package shenikan + +import ( "strings" + + // Generate json output via `cue export -p shenikan` +) + +dictionary: { + _glyphs: vowel: [Orthography=_]: [...#VowelAttribute] | *null + _glyphs: cluster: [Orthography=_]: [...#ClusterAttribute] | *null + _glyphs: punctuation: [Orthography=_]: [...#PunctuationAttribute] | *null + _glyphs: numeric: [Orthography=_]: [...#NumericAttribute] | *null + + glyphs: [ + for k, datum in _glyphs + for o, glyph in datum { + kind: k + ortho: o + attrs: glyph + }, + + for v, vglyph in _glyphs.vowel + for c, cglyph in _glyphs.cluster { + kind: "syllable" + ortho: c + v + attrs: vglyph + cglyph + }, + ] + + _dialects: [Name=_]: [Old=_]: string + + dialects: [ + for d, dialect in _dialects { + name: d + replacements: [ + for o, n in dialect { + old: o + new: n + }, + ] + }, + ] + + #RawDefinition: string | {[string]: string} | *"" + _words: [Orthography=_]: { + [#PartOfSpeech]: #RawDefinition + } + + // create { _defn: ..., { _#Definition } } to populate a proper definition + _#Definition: { + _defn: string | {[string]: string} + short: _defn & string | [for brief, long in _defn {brief}][0] + _long: *([for brief, long in _defn {long}][0]) | null + if _long != null { + long: _long + } + } + + words: [ + for o, word in _words { + spelling: o + definitions: [ + for p, defn in word { + _defn: defn + pos: p + {_#Definition} + }, + ] + }, + + for o, penta in _pentas + for v in _vowels + if penta["\(v)"] != null { + spelling: strings.Replace(o, " ", v, 1) + definitions: [ + for p, defn in penta["\(v)"] { + _defn: defn + pos: p + {_#Definition} + }, + ] + }, + + for o, icosa in _icosas + for v1 in _vowels + for v2 in _vowels + if icosa["\(v1)\(v2)"] != null { + spelling: strings.Replace(strings.Replace(o, " ", v1, 1), " ", v2, 1) + definitions: [ + for p, defn in icosa["\(v1)\(v2)"] { + _defn: defn + pos: p + {_#Definition} + }, + ] + }, + ] + + _pentas: [Orthography=_]: { + name?: string + extremes: { + i?: string + u?: string + } + for v in _vowels { + "\(v)": {[#PartOfSpeech]: #RawDefinition} | *null + } + } + + pentas: [ + for o, penta in _pentas { + if penta.name != _|_ {name: penta.name} + if penta.extremes != _|_ { + extremes: { + if penta.extremes.i != _|_ {i: penta.extremes.i} + if penta.extremes.u != _|_ {u: penta.extremes.u} + } + } + }, + ] + + _icosas: [Orthography=_]: { + name?: string + extremes: { + ix?: string + ux?: string + xi?: string + xu?: string + } + for v1 in _vowels + for v2 in _vowels { + "\(v1)\(v2)": {[#PartOfSpeech]: #RawDefinition} | *null + } + } + + icosas: [ + for o, icosa in _icosas { + if icosa.name != _|_ {name: icosa.name} + if icosa.extremes != _|_ { + extremes: { + if icosa.extremes.ix != _|_ {ix: icosa.extremes.ix} + if icosa.extremes.ux != _|_ {ux: icosa.extremes.ux} + if icosa.extremes.xi != _|_ {xi: icosa.extremes.xi} + if icosa.extremes.xu != _|_ {xu: icosa.extremes.xu} + } + } + }, + ] +} diff --git a/types.cue b/types.cue new file mode 100644 index 0000000..c9aeaf2 --- /dev/null +++ b/types.cue @@ -0,0 +1,64 @@ +package shenikan + +_vowels: ["i", "e", "a", "o", "u"] +#VowelAttribute: "outer" | "slashed" | "inner" | "both" +#ClusterAttribute: "left" | "center" | "right" | "top" | "middle" | "bottom" | + "tall" | "wide" | "both" +#PunctuationAttribute: "left" | "right" +#NumericAttribute: "circle" | "dash" | "vee" | "hump" | "dot" +#SyllableAttribute: #VowelAttribute | #ClusterAttribute + +#Glyph: #VowelGlyph | #ClusterGlyph | #PunctuationGlyph | #NumericGlyph | + #SyllableGlyph + +#VowelGlyph: {kind: "vowel", ortho: string, attrs: [...#VowelAttribute]} +#ClusterGlyph: {kind: "cluster", ortho: string, attrs: [...#ClusterAttribute]} +#PunctuationGlyph: {kind: "punctuation", ortho: string, attrs: [...#PunctuationAttribute]} +#NumericGlyph: {kind: "numeric", ortho: string, attrs: [...#NumericAttribute]} +#SyllableGlyph: {kind: "syllable", ortho: string, attrs: [...#SyllableAttribute]} + +#Replacement: { + old: string + new: string +} +#Dialect: { + name: string + replacements: [...#Replacement] +} + +#PartOfSpeech: "noun" | "pronoun" | "verb" | "adjective" | "adverb" | "syntax" +#Definition: { + pos: #PartOfSpeech + short: string + long?: string +} +#Word: { + spelling: string + definitions: [...#Definition] +} + +#Penta: { + name?: string + extremes?: { + i?: string + u?: string + } +} + +#Icosa: { + name?: string + extremes?: { + ix?: string + ux?: string + xi?: string + xu?: string + } +} + +dictionary: { + glyphs: [...#Glyph] + dialects: [...#Dialect] + words: [...#Word] + pentas: [...#Penta] + icosas: [...#Icosa] +}